[Zrouter-src-freebsd] ZRouter.org: push to FreeBSD HEAD tree
zrouter-src-freebsd at zrouter.org
zrouter-src-freebsd at zrouter.org
Wed Jul 25 14:36:09 UTC 2012
details: http://zrouter.org/hg/FreeBSD/head//rev/fc630f3c8529
changeset: 493:fc630f3c8529
user: Aleksandr Rybalko <ray at ddteam.net>
date: Wed Jul 25 16:40:53 2012 +0300
description:
Lazy update
diffstat:
head/sys/amd64/acpica/acpi_machdep.c | 4 +-
head/sys/amd64/acpica/acpi_switch.S | 177 -
head/sys/amd64/acpica/acpi_wakecode.S | 39 +-
head/sys/amd64/acpica/acpi_wakeup.c | 420 ----
head/sys/amd64/amd64/cpu_switch.S | 166 +-
head/sys/amd64/amd64/db_disasm.c | 179 +-
head/sys/amd64/amd64/fpu.c | 187 +-
head/sys/amd64/amd64/genassym.c | 9 +-
head/sys/amd64/amd64/machdep.c | 9 +-
head/sys/amd64/amd64/mem.c | 4 +-
head/sys/amd64/amd64/minidump_machdep.c | 8 +-
head/sys/amd64/amd64/mp_machdep.c | 110 +-
head/sys/amd64/amd64/pmap.c | 1197 +++++++----
head/sys/amd64/amd64/ptrace_machdep.c | 9 +-
head/sys/amd64/amd64/trap.c | 30 +-
head/sys/amd64/amd64/vm_machdep.c | 6 +-
head/sys/amd64/conf/GENERIC | 10 +-
head/sys/amd64/include/atomic.h | 76 +-
head/sys/amd64/include/cpufunc.h | 40 +-
head/sys/amd64/include/elf.h | 3 +-
head/sys/amd64/include/fpu.h | 5 +-
head/sys/amd64/include/in_cksum.h | 6 +-
head/sys/amd64/include/intr_machdep.h | 4 +-
head/sys/amd64/include/md_var.h | 3 +-
head/sys/amd64/include/pcb.h | 18 +-
head/sys/amd64/include/pcpu.h | 24 +-
head/sys/amd64/include/pmap.h | 5 +-
head/sys/amd64/include/smp.h | 3 +-
head/sys/amd64/include/vdso.h | 6 +
head/sys/amd64/include/vmparam.h | 4 +-
head/sys/amd64/linux32/linux.h | 3 +-
head/sys/amd64/linux32/linux32_dummy.c | 11 +-
head/sys/amd64/linux32/linux32_proto.h | 8 +-
head/sys/amd64/linux32/linux32_syscall.h | 4 +-
head/sys/amd64/linux32/linux32_syscalls.c | 4 +-
head/sys/amd64/linux32/linux32_sysent.c | 4 +-
head/sys/amd64/linux32/linux32_systrace_args.c | 10 +-
head/sys/amd64/linux32/syscalls.master | 6 +-
head/sys/fs/cd9660/cd9660_node.c | 5 +-
head/sys/fs/cd9660/cd9660_vfsops.c | 6 +-
head/sys/fs/devfs/devfs_vnops.c | 17 +-
head/sys/fs/ext2fs/ext2_inode.c | 6 +-
head/sys/fs/ext2fs/ext2_lookup.c | 60 +-
head/sys/fs/ext2fs/ext2_vfsops.c | 8 +-
head/sys/fs/ext2fs/ext2_vnops.c | 8 +-
head/sys/fs/hpfs/hpfs_vnops.c | 25 +-
head/sys/fs/msdosfs/denode.h | 4 +-
head/sys/fs/msdosfs/msdosfs_denode.c | 14 +-
head/sys/fs/msdosfs/msdosfs_lookup.c | 6 +-
head/sys/fs/msdosfs/msdosfs_vnops.c | 11 +-
head/sys/fs/nandfs/bmap.c | 621 ++++++
head/sys/fs/nandfs/bmap.h | 40 +
head/sys/fs/nandfs/nandfs.h | 310 +++
head/sys/fs/nandfs/nandfs_alloc.c | 364 +++
head/sys/fs/nandfs/nandfs_bmap.c | 230 ++
head/sys/fs/nandfs/nandfs_buffer.c | 83 +
head/sys/fs/nandfs/nandfs_cleaner.c | 620 ++++++
head/sys/fs/nandfs/nandfs_cpfile.c | 776 +++++++
head/sys/fs/nandfs/nandfs_dat.c | 344 +++
head/sys/fs/nandfs/nandfs_dir.c | 314 +++
head/sys/fs/nandfs/nandfs_fs.h | 565 +++++
head/sys/fs/nandfs/nandfs_ifile.c | 213 ++
head/sys/fs/nandfs/nandfs_mount.h | 50 +
head/sys/fs/nandfs/nandfs_segment.c | 1329 ++++++++++++
head/sys/fs/nandfs/nandfs_subr.c | 1120 ++++++++++
head/sys/fs/nandfs/nandfs_subr.h | 238 ++
head/sys/fs/nandfs/nandfs_sufile.c | 569 +++++
head/sys/fs/nandfs/nandfs_vfsops.c | 1590 +++++++++++++++
head/sys/fs/nandfs/nandfs_vnops.c | 2455 ++++++++++++++++++++++++
head/sys/fs/nfs/nfs_commonacl.c | 6 +-
head/sys/fs/nfsclient/nfs_clbio.c | 80 +-
head/sys/fs/nfsclient/nfs_clnode.c | 42 +-
head/sys/fs/nfsclient/nfs_clvfsops.c | 5 +-
head/sys/fs/nfsclient/nfs_clvnops.c | 23 +-
head/sys/fs/nfsclient/nfsnode.h | 3 +-
head/sys/fs/nfsserver/nfs_nfsdport.c | 9 +-
head/sys/fs/nfsserver/nfs_nfsdstate.c | 17 +-
head/sys/fs/ntfs/ntfs.h | 318 +-
head/sys/fs/ntfs/ntfs_subr.c | 170 +-
head/sys/fs/ntfs/ntfs_subr.h | 4 +-
head/sys/fs/ntfs/ntfs_vfsops.c | 84 +-
head/sys/fs/ntfs/ntfs_vnops.c | 152 +-
head/sys/fs/nullfs/null_vnops.c | 5 +-
head/sys/fs/portalfs/portal_vnops.c | 10 +-
head/sys/fs/smbfs/smbfs_node.c | 19 +-
head/sys/fs/tmpfs/tmpfs_vnops.c | 5 +-
head/sys/fs/udf/udf_vfsops.c | 4 +-
head/sys/fs/unionfs/union_subr.c | 25 +-
head/sys/fs/unionfs/union_vfsops.c | 12 +-
head/sys/fs/unionfs/union_vnops.c | 305 +-
head/sys/i386/acpica/acpi_machdep.c | 4 +-
head/sys/i386/acpica/acpi_wakecode.S | 349 +-
head/sys/i386/acpica/acpi_wakeup.c | 371 ---
head/sys/i386/conf/GENERIC | 8 +-
head/sys/i386/conf/XEN | 4 +-
head/sys/i386/i386/apic_vector.s | 22 +-
head/sys/i386/i386/bios.c | 6 +-
head/sys/i386/i386/elf_machdep.c | 7 +-
head/sys/i386/i386/genassym.c | 15 +-
head/sys/i386/i386/initcpu.c | 3 +-
head/sys/i386/i386/machdep.c | 26 +-
head/sys/i386/i386/mem.c | 4 +-
head/sys/i386/i386/minidump_machdep.c | 8 +-
head/sys/i386/i386/mp_machdep.c | 137 +-
head/sys/i386/i386/pmap.c | 416 ++-
head/sys/i386/i386/ptrace_machdep.c | 4 +-
head/sys/i386/i386/swtch.s | 111 +-
head/sys/i386/i386/trap.c | 12 +-
head/sys/i386/i386/vm86.c | 3 +-
head/sys/i386/include/apicvar.h | 5 +-
head/sys/i386/include/atomic.h | 80 +-
head/sys/i386/include/bootinfo.h | 10 +-
head/sys/i386/include/cpufunc.h | 12 +-
head/sys/i386/include/elf.h | 3 +-
head/sys/i386/include/in_cksum.h | 8 +-
head/sys/i386/include/intr_machdep.h | 4 +-
head/sys/i386/include/md_var.h | 3 +-
head/sys/i386/include/npx.h | 5 +-
head/sys/i386/include/pcb.h | 17 +-
head/sys/i386/include/pmap.h | 5 +-
head/sys/i386/include/smp.h | 7 +-
head/sys/i386/include/vdso.h | 6 +
head/sys/i386/include/vmparam.h | 5 +-
head/sys/i386/isa/npx.c | 79 +-
head/sys/i386/linux/linux.h | 3 +-
head/sys/i386/linux/linux_dummy.c | 11 +-
head/sys/i386/xen/pmap.c | 220 +-
head/sys/ia64/acpica/acpi_wakeup.c | 9 +-
head/sys/ia64/ia64/busdma_machdep.c | 14 +-
head/sys/ia64/ia64/machdep.c | 241 +-
head/sys/ia64/ia64/mp_machdep.c | 10 +-
head/sys/ia64/ia64/nexus.c | 11 +-
head/sys/ia64/ia64/physmem.c | 258 ++
head/sys/ia64/ia64/pmap.c | 81 +-
head/sys/ia64/include/_stdint.h | 8 +-
head/sys/ia64/include/_types.h | 6 +-
head/sys/ia64/include/elf.h | 3 +-
head/sys/ia64/include/in_cksum.h | 6 +-
head/sys/ia64/include/md_var.h | 13 +-
head/sys/ia64/include/param.h | 5 +-
head/sys/ia64/include/pcb.h | 6 +-
head/sys/ia64/include/pmap.h | 3 +-
head/sys/ia64/include/vdso.h | 41 +
head/sys/kern/capabilities.conf | 8 +-
head/sys/kern/dtio_kdtrace.c | 232 ++
head/sys/kern/imgact_aout.c | 15 +-
head/sys/kern/imgact_elf.c | 33 +-
head/sys/kern/imgact_gzip.c | 6 +-
head/sys/kern/init_main.c | 37 +-
head/sys/kern/init_sysent.c | 14 +-
head/sys/kern/kern_acct.c | 25 +-
head/sys/kern/kern_clock.c | 8 +-
head/sys/kern/kern_conf.c | 9 +-
head/sys/kern/kern_descrip.c | 552 ++--
head/sys/kern/kern_event.c | 21 +-
head/sys/kern/kern_exec.c | 67 +-
head/sys/kern/kern_fork.c | 13 +-
head/sys/kern/kern_jail.c | 23 +-
head/sys/kern/kern_kthread.c | 4 +-
head/sys/kern/kern_malloc.c | 8 +-
head/sys/kern/kern_proc.c | 42 +-
head/sys/kern/kern_racct.c | 7 +-
head/sys/kern/kern_rangelock.c | 246 ++
head/sys/kern/kern_sharedpage.c | 240 ++
head/sys/kern/kern_shutdown.c | 11 +-
head/sys/kern/kern_sig.c | 7 +-
head/sys/kern/kern_synch.c | 19 +-
head/sys/kern/kern_tc.c | 86 +-
head/sys/kern/kern_thr.c | 3 +-
head/sys/kern/kern_thread.c | 11 +-
head/sys/kern/kern_timeout.c | 359 +-
head/sys/kern/sched_4bsd.c | 41 +-
head/sys/kern/sched_ule.c | 40 +-
head/sys/kern/subr_bus.c | 4 +-
head/sys/kern/subr_devstat.c | 60 +-
head/sys/kern/subr_dummy_vdso_tc.c | 49 +
head/sys/kern/subr_firmware.c | 4 +-
head/sys/kern/subr_rman.c | 19 +-
head/sys/kern/subr_sleepqueue.c | 10 +-
head/sys/kern/subr_smp.c | 17 +-
head/sys/kern/subr_syscall.c | 8 +-
head/sys/kern/subr_trap.c | 3 +-
head/sys/kern/subr_turnstile.c | 12 +-
head/sys/kern/subr_witness.c | 17 +-
head/sys/kern/sys_capability.c | 6 +-
head/sys/kern/sys_generic.c | 4 +-
head/sys/kern/sys_procdesc.c | 6 +-
head/sys/kern/sys_process.c | 10 +-
head/sys/kern/syscalls.c | 4 +-
head/sys/kern/syscalls.master | 6 +-
head/sys/kern/systrace_args.c | 10 +-
head/sys/kern/tty.c | 31 +-
head/sys/kern/uipc_mqueue.c | 6 +-
head/sys/kern/uipc_socket.c | 4 +-
head/sys/kern/uipc_syscalls.c | 25 +-
head/sys/kern/uipc_usrreq.c | 4 +-
head/sys/kern/vfs_bio.c | 20 +-
head/sys/kern/vfs_default.c | 19 +-
head/sys/kern/vfs_subr.c | 15 +-
head/sys/kern/vfs_syscalls.c | 302 +-
head/sys/kern/vfs_vnops.c | 743 +++++-
head/sys/netinet/icmp_var.h | 5 +-
head/sys/netinet/if_ether.c | 15 +-
head/sys/netinet/if_ether.h | 12 +-
head/sys/netinet/igmp.c | 14 +-
head/sys/netinet/in.c | 4 +-
head/sys/netinet/in.h | 4 +-
head/sys/netinet/in_pcb.c | 6 +-
head/sys/netinet/in_pcb.h | 5 +-
head/sys/netinet/in_var.h | 8 +-
head/sys/netinet/ip.h | 27 +-
head/sys/netinet/ip_carp.c | 11 +-
head/sys/netinet/ip_fw.h | 2 +-
head/sys/netinet/ip_icmp.c | 5 +-
head/sys/netinet/ip_input.c | 11 +-
head/sys/netinet/ip_mroute.c | 5 +-
head/sys/netinet/ip_mroute.h | 3 +-
head/sys/netinet/ip_output.c | 64 +-
head/sys/netinet/ipfw/ip_dummynet.c | 4 +-
head/sys/netinet/ipfw/ip_fw_log.c | 139 +-
head/sys/netinet/ipfw/ip_fw_private.h | 2 +-
head/sys/netinet/ipfw/ip_fw_table.c | 15 +-
head/sys/netinet/libalias/alias_sctp.h | 3 +-
head/sys/netinet/libalias/libalias.3 | 16 +-
head/sys/netinet/sctp.h | 80 +-
head/sys/netinet/sctp_asconf.c | 189 +-
head/sys/netinet/sctp_asconf.h | 12 +-
head/sys/netinet/sctp_auth.c | 28 +-
head/sys/netinet/sctp_auth.h | 10 +-
head/sys/netinet/sctp_bsd_addr.c | 14 +-
head/sys/netinet/sctp_bsd_addr.h | 13 +-
head/sys/netinet/sctp_cc_functions.c | 13 +-
head/sys/netinet/sctp_constants.h | 78 +-
head/sys/netinet/sctp_crc32.c | 13 +-
head/sys/netinet/sctp_crc32.h | 14 +-
head/sys/netinet/sctp_dtrace_declare.h | 12 +-
head/sys/netinet/sctp_dtrace_define.h | 12 +-
head/sys/netinet/sctp_header.h | 27 +-
head/sys/netinet/sctp_indata.c | 170 +-
head/sys/netinet/sctp_indata.h | 22 +-
head/sys/netinet/sctp_input.c | 1134 +++++-----
head/sys/netinet/sctp_input.h | 24 +-
head/sys/netinet/sctp_lock_bsd.h | 15 +-
head/sys/netinet/sctp_os.h | 12 +-
head/sys/netinet/sctp_os_bsd.h | 45 +-
head/sys/netinet/sctp_output.c | 1250 +++--------
head/sys/netinet/sctp_output.h | 41 +-
head/sys/netinet/sctp_pcb.c | 299 +--
head/sys/netinet/sctp_pcb.h | 22 +-
head/sys/netinet/sctp_peeloff.c | 10 +-
head/sys/netinet/sctp_peeloff.h | 14 +-
head/sys/netinet/sctp_ss_functions.c | 8 +-
head/sys/netinet/sctp_structs.h | 14 +-
head/sys/netinet/sctp_sysctl.c | 21 +-
head/sys/netinet/sctp_sysctl.h | 17 +-
head/sys/netinet/sctp_timer.c | 20 +-
head/sys/netinet/sctp_timer.h | 11 +-
head/sys/netinet/sctp_uio.h | 99 +-
head/sys/netinet/sctp_usrreq.c | 180 +-
head/sys/netinet/sctp_var.h | 8 +-
head/sys/netinet/sctputil.c | 774 ++++---
head/sys/netinet/sctputil.h | 41 +-
head/sys/netinet/tcp_hostcache.c | 4 +-
head/sys/netinet/tcp_input.c | 61 +-
head/sys/netinet/tcp_lro.c | 888 +++++---
head/sys/netinet/tcp_lro.h | 123 +-
head/sys/netinet/tcp_offload.c | 209 +-
head/sys/netinet/tcp_offload.h | 364 +---
head/sys/netinet/tcp_output.c | 69 +-
head/sys/netinet/tcp_subr.c | 36 +-
head/sys/netinet/tcp_syncache.c | 147 +-
head/sys/netinet/tcp_syncache.h | 21 +-
head/sys/netinet/tcp_timer.c | 7 +-
head/sys/netinet/tcp_timewait.c | 11 +-
head/sys/netinet/tcp_usrreq.c | 77 +-
head/sys/netinet/tcp_var.h | 4 +-
head/sys/netinet/toecore.c | 575 +++++
head/sys/netinet/toecore.h | 130 +
head/sys/netinet/toedev.h | 162 -
head/sys/netinet/udp_usrreq.c | 18 +-
head/sys/pc98/conf/GENERIC | 4 +-
head/sys/pc98/include/vdso.h | 6 +
head/sys/pc98/pc98/machdep.c | 11 +-
head/sys/powerpc/aim/locore32.S | 9 +-
head/sys/powerpc/aim/locore64.S | 9 +-
head/sys/powerpc/aim/mmu_oea.c | 165 +-
head/sys/powerpc/aim/mmu_oea64.c | 186 +-
head/sys/powerpc/aim/moea64_native.c | 47 +-
head/sys/powerpc/aim/slb.c | 6 +-
head/sys/powerpc/aim/swtch32.S | 5 +-
head/sys/powerpc/aim/swtch64.S | 5 +-
head/sys/powerpc/booke/locore.S | 22 +-
head/sys/powerpc/booke/machdep.c | 82 +-
head/sys/powerpc/booke/machdep_e500.c | 158 +
head/sys/powerpc/booke/machdep_ppc4xx.c | 219 ++
head/sys/powerpc/booke/platform_bare.c | 63 +-
head/sys/powerpc/booke/pmap.c | 52 +-
head/sys/powerpc/booke/trap.c | 9 +-
head/sys/powerpc/booke/trap_subr.S | 4 +-
head/sys/powerpc/conf/DEFAULTS | 4 +-
head/sys/powerpc/conf/GENERIC | 11 +-
head/sys/powerpc/conf/GENERIC64 | 22 +-
head/sys/powerpc/conf/MPC85XX | 5 +-
head/sys/powerpc/conf/NOTES | 5 +-
head/sys/powerpc/include/_stdint.h | 8 +-
head/sys/powerpc/include/_types.h | 6 +-
head/sys/powerpc/include/atomic.h | 61 +-
head/sys/powerpc/include/cpu.h | 4 +-
head/sys/powerpc/include/cpufunc.h | 18 +-
head/sys/powerpc/include/elf.h | 5 +-
head/sys/powerpc/include/hid.h | 55 +-
head/sys/powerpc/include/in_cksum.h | 6 +-
head/sys/powerpc/include/machdep.h | 39 +
head/sys/powerpc/include/pcpu.h | 4 +-
head/sys/powerpc/include/pio.h | 57 +-
head/sys/powerpc/include/pmap.h | 19 +-
head/sys/powerpc/include/profile.h | 9 +-
head/sys/powerpc/include/psl.h | 30 +-
head/sys/powerpc/include/pte.h | 29 +-
head/sys/powerpc/include/spr.h | 228 +-
head/sys/powerpc/include/tlb.h | 86 +-
head/sys/powerpc/include/trap.h | 4 +-
head/sys/powerpc/include/ucontext.h | 8 +-
head/sys/powerpc/include/vdso.h | 41 +
head/sys/powerpc/include/vmparam.h | 4 +-
head/sys/powerpc/mpc85xx/fsl_sdhc.c | 1306 ++++++++++++
head/sys/powerpc/mpc85xx/fsl_sdhc.h | 297 ++
head/sys/powerpc/mpc85xx/i2c.c | 5 +-
head/sys/powerpc/mpc85xx/lbc.c | 303 ++-
head/sys/powerpc/mpc85xx/lbc.h | 62 +-
head/sys/powerpc/mpc85xx/mpc85xx.c | 13 +-
head/sys/powerpc/mpc85xx/nexus.c | 62 +-
head/sys/powerpc/mpc85xx/openpic_fdt.c | 93 -
head/sys/powerpc/mpc85xx/pci_fdt.c | 11 +-
head/sys/powerpc/powermac/hrowpic.c | 4 +-
head/sys/powerpc/powerpc/atomic.S | 137 -
head/sys/powerpc/powerpc/bus_machdep.c | 82 +-
head/sys/powerpc/powerpc/cpu.c | 26 +-
head/sys/powerpc/powerpc/db_trace.c | 6 +-
head/sys/powerpc/powerpc/gdb_machdep.c | 4 +-
head/sys/powerpc/powerpc/genassym.c | 28 +-
head/sys/powerpc/powerpc/mmu_if.m | 12 +-
head/sys/powerpc/powerpc/openpic_fdt.c | 93 +
head/sys/powerpc/powerpc/platform.c | 6 +-
head/sys/powerpc/powerpc/pmap_dispatch.c | 24 +-
head/sys/sparc64/conf/GENERIC | 35 +-
head/sys/sparc64/include/_stdint.h | 8 +-
head/sys/sparc64/include/_types.h | 6 +-
head/sys/sparc64/include/elf.h | 3 +-
head/sys/sparc64/include/in_cksum.h | 6 +-
head/sys/sparc64/include/intr_machdep.h | 6 +-
head/sys/sparc64/include/pcb.h | 4 +-
head/sys/sparc64/include/pmap.h | 5 +-
head/sys/sparc64/include/vdso.h | 34 +
head/sys/sparc64/sparc64/intr_machdep.c | 9 +-
head/sys/sparc64/sparc64/machdep.c | 4 +-
head/sys/sparc64/sparc64/pmap.c | 100 +-
head/sys/sparc64/sparc64/tsb.c | 5 +-
358 files changed, 25573 insertions(+), 9531 deletions(-)
diffs (53825 lines):
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/acpica/acpi_machdep.c
--- a/head/sys/amd64/acpica/acpi_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/acpica/acpi_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/amd64/acpica/acpi_machdep.c 235556 2012-05-17 17:58:53Z jhb $");
#include <sys/param.h>
#include <sys/bus.h>
@@ -44,8 +44,6 @@
#include <machine/nexusvar.h>
-SYSCTL_DECL(_debug_acpi);
-
int acpi_resume_beep;
TUNABLE_INT("debug.acpi.resume_beep", &acpi_resume_beep);
SYSCTL_INT(_debug_acpi, OID_AUTO, resume_beep, CTLFLAG_RW, &acpi_resume_beep,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/acpica/acpi_switch.S
--- a/head/sys/amd64/acpica/acpi_switch.S Wed Jul 25 16:32:50 2012 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-/*-
- * Copyright (c) 2001 Takanori Watanabe <takawata at jp.freebsd.org>
- * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki at jp.freebsd.org>
- * Copyright (c) 2008-2012 Jung-uk Kim <jkim at FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: head/sys/amd64/acpica/acpi_switch.S 230958 2012-02-03 21:24:28Z jkim $
- */
-
-#include <machine/asmacros.h>
-#include <machine/specialreg.h>
-
-#include "acpi_wakedata.h"
-#include "assym.s"
-
-#define WAKEUP_CTX(member) wakeup_ ## member - wakeup_ctx(%rsi)
-
-ENTRY(acpi_restorecpu)
- /* Switch to KPML4phys. */
- movq %rdi, %cr3
-
- /* Restore GDT. */
- lgdt WAKEUP_CTX(gdt)
- jmp 1f
-1:
-
- /* Fetch PCB. */
- movq WAKEUP_CTX(pcb), %rdi
-
- /* Force kernel segment registers. */
- movl $KDSEL, %eax
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %ss
- movl $KUF32SEL, %eax
- movw %ax, %fs
- movl $KUG32SEL, %eax
- movw %ax, %gs
-
- movl $MSR_FSBASE, %ecx
- movl PCB_FSBASE(%rdi), %eax
- movl 4 + PCB_FSBASE(%rdi), %edx
- wrmsr
- movl $MSR_GSBASE, %ecx
- movl PCB_GSBASE(%rdi), %eax
- movl 4 + PCB_GSBASE(%rdi), %edx
- wrmsr
- movl $MSR_KGSBASE, %ecx
- movl PCB_KGSBASE(%rdi), %eax
- movl 4 + PCB_KGSBASE(%rdi), %edx
- wrmsr
-
- /* Restore EFER. */
- movl $MSR_EFER, %ecx
- movl WAKEUP_CTX(efer), %eax
- wrmsr
-
- /* Restore fast syscall stuff. */
- movl $MSR_STAR, %ecx
- movl WAKEUP_CTX(star), %eax
- movl 4 + WAKEUP_CTX(star), %edx
- wrmsr
- movl $MSR_LSTAR, %ecx
- movl WAKEUP_CTX(lstar), %eax
- movl 4 + WAKEUP_CTX(lstar), %edx
- wrmsr
- movl $MSR_CSTAR, %ecx
- movl WAKEUP_CTX(cstar), %eax
- movl 4 + WAKEUP_CTX(cstar), %edx
- wrmsr
- movl $MSR_SF_MASK, %ecx
- movl WAKEUP_CTX(sfmask), %eax
- wrmsr
-
- /* Restore CR0 except for FPU mode. */
- movq PCB_CR0(%rdi), %rax
- andq $~(CR0_EM | CR0_TS), %rax
- movq %rax, %cr0
-
- /* Restore CR2 and CR4. */
- movq PCB_CR2(%rdi), %rax
- movq %rax, %cr2
- movq PCB_CR4(%rdi), %rax
- movq %rax, %cr4
-
- /* Restore descriptor tables. */
- lidt PCB_IDT(%rdi)
- lldt PCB_LDT(%rdi)
-
-#define SDT_SYSTSS 9
-#define SDT_SYSBSY 11
-
- /* Clear "task busy" bit and reload TR. */
- movq PCPU(TSS), %rax
- andb $(~SDT_SYSBSY | SDT_SYSTSS), 5(%rax)
- movw PCB_TR(%rdi), %ax
- ltr %ax
-
-#undef SDT_SYSTSS
-#undef SDT_SYSBSY
-
- /* Restore debug registers. */
- movq PCB_DR0(%rdi), %rax
- movq %rax, %dr0
- movq PCB_DR1(%rdi), %rax
- movq %rax, %dr1
- movq PCB_DR2(%rdi), %rax
- movq %rax, %dr2
- movq PCB_DR3(%rdi), %rax
- movq %rax, %dr3
- movq PCB_DR6(%rdi), %rax
- movq %rax, %dr6
- movq PCB_DR7(%rdi), %rax
- movq %rax, %dr7
-
- /* Restore FPU state. */
- fninit
- movq WAKEUP_CTX(fpusave), %rbx
- movq WAKEUP_CTX(xsmask), %rax
- testq %rax, %rax
- jz 1f
- movq %rax, %rdx
- shrq $32, %rdx
- movl $XCR0, %ecx
-/* xsetbv */
- .byte 0x0f, 0x01, 0xd1
-/* xrstor (%rbx) */
- .byte 0x0f, 0xae, 0x2b
- jmp 2f
-1:
- fxrstor (%rbx)
-2:
-
- /* Reload CR0. */
- movq PCB_CR0(%rdi), %rax
- movq %rax, %cr0
-
- /* Restore other callee saved registers. */
- movq PCB_R15(%rdi), %r15
- movq PCB_R14(%rdi), %r14
- movq PCB_R13(%rdi), %r13
- movq PCB_R12(%rdi), %r12
- movq PCB_RBP(%rdi), %rbp
- movq PCB_RSP(%rdi), %rsp
- movq PCB_RBX(%rdi), %rbx
-
- /* Restore return address. */
- movq PCB_RIP(%rdi), %rax
- movq %rax, (%rsp)
-
- /* Indicate the CPU is resumed. */
- xorl %eax, %eax
- movl %eax, WAKEUP_CTX(cpu)
-
- ret
-END(acpi_restorecpu)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/acpica/acpi_wakecode.S
--- a/head/sys/amd64/acpica/acpi_wakecode.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/acpica/acpi_wakecode.S Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/amd64/acpica/acpi_wakecode.S 231787 2012-02-15 22:10:33Z jkim $
+ * $FreeBSD: head/sys/amd64/acpica/acpi_wakecode.S 237037 2012-06-13 22:53:56Z jkim $
*/
#include <machine/asmacros.h>
@@ -219,10 +219,14 @@
mov $bootdata64 - bootgdt, %eax
mov %ax, %ds
- /* Restore arguments and return. */
- movq wakeup_kpml4 - wakeup_start(%rbx), %rdi
- movq wakeup_ctx - wakeup_start(%rbx), %rsi
- movq wakeup_retaddr - wakeup_start(%rbx), %rax
+ /* Restore arguments. */
+ movq wakeup_pcb - wakeup_start(%rbx), %rdi
+ movq wakeup_ret - wakeup_start(%rbx), %rax
+
+ /* Restore GDT. */
+ lgdt wakeup_gdt - wakeup_start(%rbx)
+
+ /* Jump to return address. */
jmp *%rax
.data
@@ -268,34 +272,11 @@
.long bootgdt - wakeup_start /* Offset plus %ds << 4 */
ALIGN_DATA
-wakeup_retaddr:
- .quad 0
-wakeup_kpml4:
- .quad 0
-
-wakeup_ctx:
- .quad 0
wakeup_pcb:
.quad 0
-wakeup_fpusave:
+wakeup_ret:
.quad 0
wakeup_gdt:
.word 0
.quad 0
-
- ALIGN_DATA
-wakeup_efer:
- .quad 0
-wakeup_star:
- .quad 0
-wakeup_lstar:
- .quad 0
-wakeup_cstar:
- .quad 0
-wakeup_sfmask:
- .quad 0
-wakeup_xsmask:
- .quad 0
-wakeup_cpu:
- .long 0
dummy:
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/acpica/acpi_wakeup.c
--- a/head/sys/amd64/acpica/acpi_wakeup.c Wed Jul 25 16:32:50 2012 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,420 +0,0 @@
-/*-
- * Copyright (c) 2001 Takanori Watanabe <takawata at jp.freebsd.org>
- * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki at jp.freebsd.org>
- * Copyright (c) 2003 Peter Wemm
- * Copyright (c) 2008-2012 Jung-uk Kim <jkim at FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/acpica/acpi_wakeup.c 233704 2012-03-30 17:03:06Z jkim $");
-
-#include <sys/param.h>
-#include <sys/bus.h>
-#include <sys/eventhandler.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/smp.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
-#include <machine/clock.h>
-#include <machine/intr_machdep.h>
-#include <x86/mca.h>
-#include <machine/pcb.h>
-#include <machine/pmap.h>
-#include <machine/specialreg.h>
-#include <machine/md_var.h>
-
-#ifdef SMP
-#include <x86/apicreg.h>
-#include <machine/smp.h>
-#include <machine/vmparam.h>
-#endif
-
-#include <contrib/dev/acpica/include/acpi.h>
-
-#include <dev/acpica/acpivar.h>
-
-#include "acpi_wakecode.h"
-#include "acpi_wakedata.h"
-
-/* Make sure the code is less than a page and leave room for the stack. */
-CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024);
-
-extern int acpi_resume_beep;
-extern int acpi_reset_video;
-
-#ifdef SMP
-extern struct pcb **susppcbs;
-extern void **suspfpusave;
-#else
-static struct pcb **susppcbs;
-static void **suspfpusave;
-#endif
-
-int acpi_restorecpu(uint64_t, vm_offset_t);
-
-static void *acpi_alloc_wakeup_handler(void);
-static void acpi_stop_beep(void *);
-
-#ifdef SMP
-static int acpi_wakeup_ap(struct acpi_softc *, int);
-static void acpi_wakeup_cpus(struct acpi_softc *, const cpuset_t *);
-#endif
-
-#define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE))
-#define WAKECODE_PADDR(sc) ((sc)->acpi_wakephys + (3 * PAGE_SIZE))
-#define WAKECODE_FIXUP(offset, type, val) do { \
- type *addr; \
- addr = (type *)(WAKECODE_VADDR(sc) + offset); \
- *addr = val; \
-} while (0)
-
-static void
-acpi_stop_beep(void *arg)
-{
-
- if (acpi_resume_beep != 0)
- timer_spkr_release();
-}
-
-#ifdef SMP
-static int
-acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
-{
- int vector = (WAKECODE_PADDR(sc) >> 12) & 0xff;
- int apic_id = cpu_apic_ids[cpu];
- int ms;
-
- WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[cpu]);
- WAKECODE_FIXUP(wakeup_fpusave, void *, suspfpusave[cpu]);
- WAKECODE_FIXUP(wakeup_gdt, uint16_t, susppcbs[cpu]->pcb_gdt.rd_limit);
- WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
- susppcbs[cpu]->pcb_gdt.rd_base);
- WAKECODE_FIXUP(wakeup_cpu, int, cpu);
-
- /* do an INIT IPI: assert RESET */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
-
- /* wait for pending status end */
- lapic_ipi_wait(-1);
-
- /* do an INIT IPI: deassert RESET */
- lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
-
- /* wait for pending status end */
- DELAY(10000); /* wait ~10mS */
- lapic_ipi_wait(-1);
-
- /*
- * next we do a STARTUP IPI: the previous INIT IPI might still be
- * latched, (P5 bug) this 1st STARTUP would then terminate
- * immediately, and the previously started INIT IPI would continue. OR
- * the previous INIT IPI has already run. and this STARTUP IPI will
- * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
- * will run.
- */
-
- /* do a STARTUP IPI */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- lapic_ipi_wait(-1);
- DELAY(200); /* wait ~200uS */
-
- /*
- * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
- * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
- * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
- * recognized after hardware RESET or INIT IPI.
- */
-
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- lapic_ipi_wait(-1);
- DELAY(200); /* wait ~200uS */
-
- /* Wait up to 5 seconds for it to start. */
- for (ms = 0; ms < 5000; ms++) {
- if (*(int *)(WAKECODE_VADDR(sc) + wakeup_cpu) == 0)
- return (1); /* return SUCCESS */
- DELAY(1000);
- }
- return (0); /* return FAILURE */
-}
-
-#define WARMBOOT_TARGET 0
-#define WARMBOOT_OFF (KERNBASE + 0x0467)
-#define WARMBOOT_SEG (KERNBASE + 0x0469)
-
-#define CMOS_REG (0x70)
-#define CMOS_DATA (0x71)
-#define BIOS_RESET (0x0f)
-#define BIOS_WARM (0x0a)
-
-static void
-acpi_wakeup_cpus(struct acpi_softc *sc, const cpuset_t *wakeup_cpus)
-{
- uint32_t mpbioswarmvec;
- int cpu;
- u_char mpbiosreason;
-
- /* save the current value of the warm-start vector */
- mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF);
- outb(CMOS_REG, BIOS_RESET);
- mpbiosreason = inb(CMOS_DATA);
-
- /* setup a vector to our boot code */
- *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET;
- *((volatile u_short *)WARMBOOT_SEG) = WAKECODE_PADDR(sc) >> 4;
- outb(CMOS_REG, BIOS_RESET);
- outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
-
- /* Wake up each AP. */
- for (cpu = 1; cpu < mp_ncpus; cpu++) {
- if (!CPU_ISSET(cpu, wakeup_cpus))
- continue;
- if (acpi_wakeup_ap(sc, cpu) == 0) {
- /* restore the warmstart vector */
- *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
- panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)",
- cpu, cpu_apic_ids[cpu]);
- }
- }
-
- /* restore the warmstart vector */
- *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
-
- outb(CMOS_REG, BIOS_RESET);
- outb(CMOS_DATA, mpbiosreason);
-}
-#endif
-
-int
-acpi_sleep_machdep(struct acpi_softc *sc, int state)
-{
-#ifdef SMP
- cpuset_t wakeup_cpus;
-#endif
- register_t rf;
- ACPI_STATUS status;
- int ret;
-
- ret = -1;
-
- if (sc->acpi_wakeaddr == 0ul)
- return (ret);
-
-#ifdef SMP
- wakeup_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &wakeup_cpus);
-#endif
-
- if (acpi_resume_beep != 0)
- timer_spkr_acquire();
-
- AcpiSetFirmwareWakingVector(WAKECODE_PADDR(sc));
-
- rf = intr_disable();
- intr_suspend();
-
- if (savectx(susppcbs[0])) {
- ctx_fpusave(suspfpusave[0]);
-#ifdef SMP
- if (!CPU_EMPTY(&wakeup_cpus) &&
- suspend_cpus(wakeup_cpus) == 0) {
- device_printf(sc->acpi_dev, "Failed to suspend APs\n");
- goto out;
- }
-#endif
-
- WAKECODE_FIXUP(resume_beep, uint8_t, (acpi_resume_beep != 0));
- WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0));
-
- WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[0]);
- WAKECODE_FIXUP(wakeup_fpusave, void *, suspfpusave[0]);
- WAKECODE_FIXUP(wakeup_gdt, uint16_t,
- susppcbs[0]->pcb_gdt.rd_limit);
- WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
- susppcbs[0]->pcb_gdt.rd_base);
- WAKECODE_FIXUP(wakeup_cpu, int, 0);
-
- /* Call ACPICA to enter the desired sleep state */
- if (state == ACPI_STATE_S4 && sc->acpi_s4bios)
- status = AcpiEnterSleepStateS4bios();
- else
- status = AcpiEnterSleepState(state, acpi_sleep_flags);
-
- if (status != AE_OK) {
- device_printf(sc->acpi_dev,
- "AcpiEnterSleepState failed - %s\n",
- AcpiFormatException(status));
- goto out;
- }
-
- for (;;)
- ia32_pause();
- } else {
- pmap_init_pat();
- load_cr3(susppcbs[0]->pcb_cr3);
- initializecpu();
- PCPU_SET(switchtime, 0);
- PCPU_SET(switchticks, ticks);
-#ifdef SMP
- if (!CPU_EMPTY(&wakeup_cpus))
- acpi_wakeup_cpus(sc, &wakeup_cpus);
-#endif
- ret = 0;
- }
-
-out:
-#ifdef SMP
- if (!CPU_EMPTY(&wakeup_cpus))
- restart_cpus(wakeup_cpus);
-#endif
-
- mca_resume();
- intr_resume();
- intr_restore(rf);
-
- AcpiSetFirmwareWakingVector(0);
-
- if (ret == 0 && mem_range_softc.mr_op != NULL &&
- mem_range_softc.mr_op->reinit != NULL)
- mem_range_softc.mr_op->reinit(&mem_range_softc);
-
- return (ret);
-}
-
-static void *
-acpi_alloc_wakeup_handler(void)
-{
- void *wakeaddr;
- int i;
-
- /*
- * Specify the region for our wakeup code. We want it in the low 1 MB
- * region, excluding real mode IVT (0-0x3ff), BDA (0x400-0x4ff), EBDA
- * (less than 128KB, below 0xa0000, must be excluded by SMAP and DSDT),
- * and ROM area (0xa0000 and above). The temporary page tables must be
- * page-aligned.
- */
- wakeaddr = contigmalloc(4 * PAGE_SIZE, M_DEVBUF, M_WAITOK, 0x500,
- 0xa0000, PAGE_SIZE, 0ul);
- if (wakeaddr == NULL) {
- printf("%s: can't alloc wake memory\n", __func__);
- return (NULL);
- }
- if (EVENTHANDLER_REGISTER(power_resume, acpi_stop_beep, NULL,
- EVENTHANDLER_PRI_LAST) == NULL) {
- printf("%s: can't register event handler\n", __func__);
- contigfree(wakeaddr, 4 * PAGE_SIZE, M_DEVBUF);
- return (NULL);
- }
- susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK);
- suspfpusave = malloc(mp_ncpus * sizeof(void *), M_DEVBUF, M_WAITOK);
- for (i = 0; i < mp_ncpus; i++) {
- susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK);
- suspfpusave[i] = alloc_fpusave(M_WAITOK);
- }
-
- return (wakeaddr);
-}
-
-void
-acpi_install_wakeup_handler(struct acpi_softc *sc)
-{
- static void *wakeaddr = NULL;
- uint64_t *pt4, *pt3, *pt2;
- int i;
-
- if (wakeaddr != NULL)
- return;
-
- wakeaddr = acpi_alloc_wakeup_handler();
- if (wakeaddr == NULL)
- return;
-
- sc->acpi_wakeaddr = (vm_offset_t)wakeaddr;
- sc->acpi_wakephys = vtophys(wakeaddr);
-
- bcopy(wakecode, (void *)WAKECODE_VADDR(sc), sizeof(wakecode));
-
- /* Patch GDT base address, ljmp targets and page table base address. */
- WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t,
- WAKECODE_PADDR(sc) + bootgdt);
- WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t,
- WAKECODE_PADDR(sc) + wakeup_32);
- WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t,
- WAKECODE_PADDR(sc) + wakeup_64);
- WAKECODE_FIXUP(wakeup_pagetables, uint32_t, sc->acpi_wakephys);
-
- /* Save pointers to some global data. */
- WAKECODE_FIXUP(wakeup_retaddr, void *, acpi_restorecpu);
- WAKECODE_FIXUP(wakeup_kpml4, uint64_t, KPML4phys);
- WAKECODE_FIXUP(wakeup_ctx, vm_offset_t,
- WAKECODE_VADDR(sc) + wakeup_ctx);
- WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER));
- WAKECODE_FIXUP(wakeup_star, uint64_t, rdmsr(MSR_STAR));
- WAKECODE_FIXUP(wakeup_lstar, uint64_t, rdmsr(MSR_LSTAR));
- WAKECODE_FIXUP(wakeup_cstar, uint64_t, rdmsr(MSR_CSTAR));
- WAKECODE_FIXUP(wakeup_sfmask, uint64_t, rdmsr(MSR_SF_MASK));
- WAKECODE_FIXUP(wakeup_xsmask, uint64_t, xsave_mask);
-
- /* Build temporary page tables below realmode code. */
- pt4 = wakeaddr;
- pt3 = pt4 + (PAGE_SIZE) / sizeof(uint64_t);
- pt2 = pt3 + (PAGE_SIZE) / sizeof(uint64_t);
-
- /* Create the initial 1GB replicated page tables */
- for (i = 0; i < 512; i++) {
- /*
- * Each slot of the level 4 pages points
- * to the same level 3 page
- */
- pt4[i] = (uint64_t)(sc->acpi_wakephys + PAGE_SIZE);
- pt4[i] |= PG_V | PG_RW | PG_U;
-
- /*
- * Each slot of the level 3 pages points
- * to the same level 2 page
- */
- pt3[i] = (uint64_t)(sc->acpi_wakephys + (2 * PAGE_SIZE));
- pt3[i] |= PG_V | PG_RW | PG_U;
-
- /* The level 2 page slots are mapped with 2MB pages for 1GB. */
- pt2[i] = i * (2 * 1024 * 1024);
- pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
- }
-
- if (bootverbose)
- device_printf(sc->acpi_dev, "wakeup code va %p pa %p\n",
- (void *)sc->acpi_wakeaddr, (void *)sc->acpi_wakephys);
-}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/cpu_switch.S
--- a/head/sys/amd64/amd64/cpu_switch.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/cpu_switch.S Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/amd64/amd64/cpu_switch.S 232226 2012-02-27 17:28:22Z jhb $
+ * $FreeBSD: head/sys/amd64/amd64/cpu_switch.S 238450 2012-07-14 15:48:30Z kib $
*/
#include <machine/asmacros.h>
@@ -122,8 +122,10 @@
1: movq %rdx,%rcx
movl xsave_mask,%eax
movl xsave_mask+4,%edx
-/* xsave (%r8) */
- .byte 0x41,0x0f,0xae,0x20
+ .globl ctx_switch_xsave
+ctx_switch_xsave:
+ /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
+ xsave (%r8)
movq %rcx,%rdx
2: smsw %ax
orb $CR0_TS,%al
@@ -357,6 +359,30 @@
rdmsr
movl %eax,PCB_KGSBASE(%rdi)
movl %edx,PCB_KGSBASE+4(%rdi)
+ movl $MSR_EFER,%ecx
+ rdmsr
+ movl %eax,PCB_EFER(%rdi)
+ movl %edx,PCB_EFER+4(%rdi)
+ movl $MSR_STAR,%ecx
+ rdmsr
+ movl %eax,PCB_STAR(%rdi)
+ movl %edx,PCB_STAR+4(%rdi)
+ movl $MSR_LSTAR,%ecx
+ rdmsr
+ movl %eax,PCB_LSTAR(%rdi)
+ movl %edx,PCB_LSTAR+4(%rdi)
+ movl $MSR_CSTAR,%ecx
+ rdmsr
+ movl %eax,PCB_CSTAR(%rdi)
+ movl %edx,PCB_CSTAR+4(%rdi)
+ movl $MSR_SF_MASK,%ecx
+ rdmsr
+ movl %eax,PCB_SFMASK(%rdi)
+ movl %edx,PCB_SFMASK+4(%rdi)
+ movl xsave_mask,%eax
+ movl %eax,PCB_XSMASK(%rdi)
+ movl xsave_mask+4,%eax
+ movl %eax,PCB_XSMASK+4(%rdi)
sgdt PCB_GDT(%rdi)
sidt PCB_IDT(%rdi)
@@ -370,6 +396,140 @@
END(savectx)
/*
+ * resumectx(pcb)
+ * Resuming processor state from pcb.
+ */
+ENTRY(resumectx)
+ /* Switch to KPML4phys. */
+ movq KPML4phys,%rax
+ movq %rax,%cr3
+
+ /* Force kernel segment registers. */
+ movl $KDSEL,%eax
+ movw %ax,%ds
+ movw %ax,%es
+ movw %ax,%ss
+ movl $KUF32SEL,%eax
+ movw %ax,%fs
+ movl $KUG32SEL,%eax
+ movw %ax,%gs
+
+ movl $MSR_FSBASE,%ecx
+ movl PCB_FSBASE(%rdi),%eax
+ movl 4 + PCB_FSBASE(%rdi),%edx
+ wrmsr
+ movl $MSR_GSBASE,%ecx
+ movl PCB_GSBASE(%rdi),%eax
+ movl 4 + PCB_GSBASE(%rdi),%edx
+ wrmsr
+ movl $MSR_KGSBASE,%ecx
+ movl PCB_KGSBASE(%rdi),%eax
+ movl 4 + PCB_KGSBASE(%rdi),%edx
+ wrmsr
+
+ /* Restore EFER. */
+ movl $MSR_EFER,%ecx
+ movl PCB_EFER(%rdi),%eax
+ wrmsr
+
+ /* Restore fast syscall stuff. */
+ movl $MSR_STAR,%ecx
+ movl PCB_STAR(%rdi),%eax
+ movl 4 + PCB_STAR(%rdi),%edx
+ wrmsr
+ movl $MSR_LSTAR,%ecx
+ movl PCB_LSTAR(%rdi),%eax
+ movl 4 + PCB_LSTAR(%rdi),%edx
+ wrmsr
+ movl $MSR_CSTAR,%ecx
+ movl PCB_CSTAR(%rdi),%eax
+ movl 4 + PCB_CSTAR(%rdi),%edx
+ wrmsr
+ movl $MSR_SF_MASK,%ecx
+ movl PCB_SFMASK(%rdi),%eax
+ wrmsr
+
+ /* Restore CR0 except for FPU mode. */
+ movq PCB_CR0(%rdi),%rax
+ andq $~(CR0_EM | CR0_TS),%rax
+ movq %rax,%cr0
+
+ /* Restore CR2, CR4 and CR3. */
+ movq PCB_CR2(%rdi),%rax
+ movq %rax,%cr2
+ movq PCB_CR4(%rdi),%rax
+ movq %rax,%cr4
+ movq PCB_CR3(%rdi),%rax
+ movq %rax,%cr3
+
+ /* Restore descriptor tables. */
+ lidt PCB_IDT(%rdi)
+ lldt PCB_LDT(%rdi)
+
+#define SDT_SYSTSS 9
+#define SDT_SYSBSY 11
+
+ /* Clear "task busy" bit and reload TR. */
+ movq PCPU(TSS),%rax
+ andb $(~SDT_SYSBSY | SDT_SYSTSS),5(%rax)
+ movw PCB_TR(%rdi),%ax
+ ltr %ax
+
+#undef SDT_SYSTSS
+#undef SDT_SYSBSY
+
+ /* Restore debug registers. */
+ movq PCB_DR0(%rdi),%rax
+ movq %rax,%dr0
+ movq PCB_DR1(%rdi),%rax
+ movq %rax,%dr1
+ movq PCB_DR2(%rdi),%rax
+ movq %rax,%dr2
+ movq PCB_DR3(%rdi),%rax
+ movq %rax,%dr3
+ movq PCB_DR6(%rdi),%rax
+ movq %rax,%dr6
+ movq PCB_DR7(%rdi),%rax
+ movq %rax,%dr7
+
+ /* Restore FPU state. */
+ fninit
+ movq PCB_FPUSUSPEND(%rdi),%rbx
+ movq PCB_XSMASK(%rdi),%rax
+ testq %rax,%rax
+ jz 1f
+ movq %rax,%rdx
+ shrq $32,%rdx
+ movl $XCR0,%ecx
+ xsetbv
+ xrstor (%rbx)
+ jmp 2f
+1:
+ fxrstor (%rbx)
+2:
+
+ /* Reload CR0. */
+ movq PCB_CR0(%rdi),%rax
+ movq %rax,%cr0
+
+ /* Restore other callee saved registers. */
+ movq PCB_R15(%rdi),%r15
+ movq PCB_R14(%rdi),%r14
+ movq PCB_R13(%rdi),%r13
+ movq PCB_R12(%rdi),%r12
+ movq PCB_RBP(%rdi),%rbp
+ movq PCB_RSP(%rdi),%rsp
+ movq PCB_RBX(%rdi),%rbx
+
+ /* Restore return address. */
+ movq PCB_RIP(%rdi),%rax
+ movq %rax,(%rsp)
+
+ xorl %eax,%eax
+ ret
+END(resumectx)
+
+/*
* Wrapper around fpusave to care about TS0_CR.
*/
ENTRY(ctx_fpusave)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/db_disasm.c
--- a/head/sys/amd64/amd64/db_disasm.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/db_disasm.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,12 +25,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/db_disasm.c 238166 2012-07-06 14:25:59Z jhb $");
/*
* Instruction disassembler.
*/
#include <sys/param.h>
+#include <sys/libkern.h>
#include <ddb/ddb.h>
#include <ddb/db_access.h>
@@ -47,7 +48,9 @@
#define DBLR 5
#define EXTR 6
#define SDEP 7
-#define NONE 8
+#define ADEP 8
+#define ESC 9
+#define NONE 10
/*
* REX prefix and bits
@@ -67,6 +70,7 @@
#define Eb 4 /* address, byte size */
#define R 5 /* register, in 'reg' field */
#define Rw 6 /* word register, in 'reg' field */
+#define Rq 39 /* quad register, in 'reg' field */
#define Ri 7 /* register in instruction */
#define S 8 /* segment reg, in 'reg' field */
#define Si 9 /* segment reg, in instruction */
@@ -120,6 +124,45 @@
(or pointer to table) */
};
+static const struct inst db_inst_0f388x[] = {
+/*80*/ { "", TRUE, SDEP, op2(E, Rq), "invept" },
+/*81*/ { "", TRUE, SDEP, op2(E, Rq), "invvpid" },
+/*82*/ { "", FALSE, NONE, 0, 0 },
+/*83*/ { "", FALSE, NONE, 0, 0 },
+/*84*/ { "", FALSE, NONE, 0, 0 },
+/*85*/ { "", FALSE, NONE, 0, 0 },
+/*86*/ { "", FALSE, NONE, 0, 0 },
+/*87*/ { "", FALSE, NONE, 0, 0 },
+
+/*88*/ { "", FALSE, NONE, 0, 0 },
+/*89*/ { "", FALSE, NONE, 0, 0 },
+/*8a*/ { "", FALSE, NONE, 0, 0 },
+/*8b*/ { "", FALSE, NONE, 0, 0 },
+/*8c*/ { "", FALSE, NONE, 0, 0 },
+/*8d*/ { "", FALSE, NONE, 0, 0 },
+/*8e*/ { "", FALSE, NONE, 0, 0 },
+/*8f*/ { "", FALSE, NONE, 0, 0 },
+};
+
+static const struct inst * const db_inst_0f38[] = {
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ db_inst_0f388x,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+};
+
static const char * const db_Grp6[] = {
"sldt",
"str",
@@ -160,8 +203,8 @@
"",
"",
"",
- "",
- ""
+ "vmptrld",
+ "vmptrst"
};
static const char * const db_Grp15[] = {
@@ -169,9 +212,9 @@
"fxrstor",
"ldmxcsr",
"stmxcsr",
- "",
- "",
- "",
+ "xsave",
+ "xrstor",
+ "xsaveopt",
"clflush"
};
@@ -236,7 +279,7 @@
/*36*/ { "", FALSE, NONE, 0, 0 },
/*37*/ { "getsec",FALSE, NONE, 0, 0 },
-/*38*/ { "", FALSE, NONE, 0, 0 },
+/*38*/ { "", FALSE, ESC, 0, db_inst_0f38 },
/*39*/ { "", FALSE, NONE, 0, 0 },
/*3a*/ { "", FALSE, NONE, 0, 0 },
/*3b*/ { "", FALSE, NONE, 0, 0 },
@@ -266,6 +309,26 @@
/*4f*/ { "cmovnle",TRUE, NONE, op2(E, R), 0 },
};
+static const struct inst db_inst_0f7x[] = {
+/*70*/ { "", FALSE, NONE, 0, 0 },
+/*71*/ { "", FALSE, NONE, 0, 0 },
+/*72*/ { "", FALSE, NONE, 0, 0 },
+/*73*/ { "", FALSE, NONE, 0, 0 },
+/*74*/ { "", FALSE, NONE, 0, 0 },
+/*75*/ { "", FALSE, NONE, 0, 0 },
+/*76*/ { "", FALSE, NONE, 0, 0 },
+/*77*/ { "", FALSE, NONE, 0, 0 },
+
+/*78*/ { "vmread", TRUE, NONE, op2(Rq, E), 0 },
+/*79*/ { "vmwrite",TRUE, NONE, op2(E, Rq), 0 },
+/*7a*/ { "", FALSE, NONE, 0, 0 },
+/*7b*/ { "", FALSE, NONE, 0, 0 },
+/*7c*/ { "", FALSE, NONE, 0, 0 },
+/*7d*/ { "", FALSE, NONE, 0, 0 },
+/*7e*/ { "", FALSE, NONE, 0, 0 },
+/*7f*/ { "", FALSE, NONE, 0, 0 },
+};
+
static const struct inst db_inst_0f8x[] = {
/*80*/ { "jo", FALSE, NONE, op1(Dl), 0 },
/*81*/ { "jno", FALSE, NONE, op1(Dl), 0 },
@@ -373,7 +436,7 @@
db_inst_0f4x,
0,
0,
- 0,
+ db_inst_0f7x,
db_inst_0f8x,
db_inst_0f9x,
db_inst_0fax,
@@ -582,7 +645,7 @@
/*0c*/ { "or", FALSE, BYTE, op2(I, A), 0 },
/*0d*/ { "or", FALSE, LONG, op2(I, A), 0 },
/*0e*/ { "push", FALSE, NONE, op1(Si), 0 },
-/*0f*/ { "", FALSE, NONE, 0, 0 },
+/*0f*/ { "", FALSE, ESC, 0, db_inst_0f },
/*10*/ { "adc", TRUE, BYTE, op2(R, E), 0 },
/*11*/ { "adc", TRUE, LONG, op2(R, E), 0 },
@@ -738,8 +801,8 @@
/*96*/ { "xchg", FALSE, LONG, op2(A, Ri), 0 },
/*97*/ { "xchg", FALSE, LONG, op2(A, Ri), 0 },
-/*98*/ { "cbw", FALSE, SDEP, 0, "cwde" }, /* cbw/cwde */
-/*99*/ { "cwd", FALSE, SDEP, 0, "cdq" }, /* cwd/cdq */
+/*98*/ { "cwde", FALSE, SDEP, 0, "cbw" },
+/*99*/ { "cdq", FALSE, SDEP, 0, "cwd" },
/*9a*/ { "lcall", FALSE, NONE, op1(OS), 0 },
/*9b*/ { "wait", FALSE, NONE, 0, 0 },
/*9c*/ { "pushf", FALSE, LONG, 0, 0 },
@@ -822,7 +885,7 @@
/*e0*/ { "loopne",FALSE, NONE, op1(Db), 0 },
/*e1*/ { "loope", FALSE, NONE, op1(Db), 0 },
/*e2*/ { "loop", FALSE, NONE, op1(Db), 0 },
-/*e3*/ { "jcxz", FALSE, SDEP, op1(Db), "jecxz" },
+/*e3*/ { "jrcxz", FALSE, ADEP, op1(Db), "jecxz" },
/*e4*/ { "in", FALSE, BYTE, op2(Ib, A), 0 },
/*e5*/ { "in", FALSE, LONG, op2(Ib, A) , 0 },
/*e6*/ { "out", FALSE, BYTE, op2(A, Ib), 0 },
@@ -1208,14 +1271,6 @@
if (prefix) {
get_value_inc(inst, loc, 1, FALSE);
}
- if (rep == TRUE) {
- if (inst == 0x90) {
- db_printf("pause\n");
- return (loc);
- }
- db_printf("repe "); /* XXX repe VS rep */
- rep = FALSE;
- }
} while (prefix);
if (inst >= 0xd8 && inst <= 0xdf) {
@@ -1224,9 +1279,10 @@
return (loc);
}
- if (inst == 0x0f) {
+ ip = &db_inst_table[inst];
+ while (ip->i_size == ESC) {
get_value_inc(inst, loc, 1, FALSE);
- ip = db_inst_0f[inst>>4];
+ ip = ((const struct inst * const *)ip->i_extra)[inst>>4];
if (ip == 0) {
ip = &db_bad_inst;
}
@@ -1234,8 +1290,6 @@
ip = &ip[inst&0xf];
}
}
- else
- ip = &db_inst_table[inst];
if (ip->i_has_modrm) {
get_value_inc(regmodrm, loc, 1, FALSE);
@@ -1269,6 +1323,26 @@
/* Special cases that don't fit well in the tables. */
if (ip->i_extra == db_Grp7 && f_mod(rex, regmodrm) == 3) {
switch (regmodrm) {
+ case 0xc1:
+ i_name = "vmcall";
+ i_size = NONE;
+ i_mode = 0;
+ break;
+ case 0xc2:
+ i_name = "vmlaunch";
+ i_size = NONE;
+ i_mode = 0;
+ break;
+ case 0xc3:
+ i_name = "vmresume";
+ i_size = NONE;
+ i_mode = 0;
+ break;
+ case 0xc4:
+ i_name = "vmxoff";
+ i_size = NONE;
+ i_mode = 0;
+ break;
case 0xc8:
i_name = "monitor";
i_size = NONE;
@@ -1279,11 +1353,26 @@
i_size = NONE;
i_mode = 0;
break;
+ case 0xd0:
+ i_name = "xgetbv";
+ i_size = NONE;
+ i_mode = 0;
+ break;
+ case 0xd1:
+ i_name = "xsetbv";
+ i_size = NONE;
+ i_mode = 0;
+ break;
case 0xf8:
i_name = "swapgs";
i_size = NONE;
i_mode = 0;
break;
+ case 0xf9:
+ i_name = "rdtscp";
+ i_size = NONE;
+ i_mode = 0;
+ break;
}
}
if (ip->i_extra == db_Grp15 && f_mod(rex, regmodrm) == 3) {
@@ -1292,8 +1381,42 @@
i_mode = 0;
}
+ /* Handle instructions identified by mandatory prefixes. */
+ if (rep == TRUE) {
+ if (inst == 0x90) {
+ i_name = "pause";
+ i_size = NONE;
+ i_mode = 0;
+ rep = FALSE;
+ } else if (ip->i_extra == db_Grp9 && f_mod(rex, regmodrm) != 3 &&
+ f_reg(rex, regmodrm) == 0x6) {
+ i_name = "vmxon";
+ rep = FALSE;
+ }
+ }
+ if (size == WORD) {
+ if (ip->i_extra == db_Grp9 && f_mod(rex, regmodrm) != 3 &&
+ f_reg(rex, regmodrm) == 0x6) {
+ i_name = "vmclear";
+ }
+ }
+ if (rex & REX_W) {
+ if (strcmp(i_name, "cwde") == 0)
+ i_name = "cdqe";
+ else if (strcmp(i_name, "cmpxchg8b") == 0)
+ i_name = "cmpxchg16b";
+ }
+
+ if (rep == TRUE)
+ db_printf("repe "); /* XXX repe VS rep */
+
if (i_size == SDEP) {
- if (size == WORD)
+ if (size == LONG)
+ db_printf("%s", i_name);
+ else
+ db_printf("%s", (const char *)ip->i_extra);
+ } else if (i_size == ADEP) {
+ if (short_addr == FALSE)
db_printf("%s", i_name);
else
db_printf("%s", (const char *)ip->i_extra);
@@ -1366,6 +1489,10 @@
db_printf("%s", db_reg[rex != 0 ? 1 : 0][WORD][f_reg(rex, regmodrm)]);
break;
+ case Rq:
+ db_printf("%s", db_reg[rex != 0 ? 1 : 0][QUAD][f_reg(rex, regmodrm)]);
+ break;
+
case Ri:
db_printf("%s", db_reg[0][QUAD][f_rm(rex, inst)]);
break;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/fpu.c
--- a/head/sys/amd64/amd64/fpu.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/fpu.c Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/fpu.c 230766 2012-01-30 07:53:33Z kib $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/fpu.c 238671 2012-07-21 13:53:00Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -73,10 +73,7 @@
#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr))
-#define start_emulating() __asm __volatile( \
- "smsw %%ax; orb %0,%%al; lmsw %%ax" \
- : : "n" (CR0_TS) : "ax")
-#define stop_emulating() __asm __volatile("clts")
+#define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr)))
static __inline void
xrstor(char *addr, uint64_t mask)
@@ -85,9 +82,7 @@
low = mask;
hi = mask >> 32;
- /* xrstor (%rdi) */
- __asm __volatile(".byte 0x0f,0xae,0x2f" : :
- "a" (low), "d" (hi), "D" (addr));
+ __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi));
}
static __inline void
@@ -97,20 +92,8 @@
low = mask;
hi = mask >> 32;
- /* xsave (%rdi) */
- __asm __volatile(".byte 0x0f,0xae,0x27" : :
- "a" (low), "d" (hi), "D" (addr) : "memory");
-}
-
-static __inline void
-xsetbv(uint32_t reg, uint64_t val)
-{
- uint32_t low, hi;
-
- low = val;
- hi = val >> 32;
- __asm __volatile(".byte 0x0f,0x01,0xd1" : :
- "c" (reg), "a" (low), "d" (hi));
+ __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) :
+ "memory");
}
#else /* !(__GNUCLIKE_ASM && !lint) */
@@ -123,16 +106,14 @@
void fxsave(caddr_t addr);
void fxrstor(caddr_t addr);
void ldmxcsr(u_int csr);
-void start_emulating(void);
-void stop_emulating(void);
+void stmxcsr(u_int csr);
void xrstor(char *addr, uint64_t mask);
void xsave(char *addr, uint64_t mask);
-void xsetbv(uint32_t reg, uint64_t val);
#endif /* __GNUCLIKE_ASM && !lint */
-#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw)
-#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw)
+#define start_emulating() load_cr0(rcr0() | CR0_TS)
+#define stop_emulating() clts()
CTASSERT(sizeof(struct savefpu) == 512);
CTASSERT(sizeof(struct xstate_hdr) == 64);
@@ -141,7 +122,7 @@
/*
* This requirement is to make it easier for asm code to calculate
* offset of the fpu save area from the pcb address. FPU save area
- * must by 64-bytes aligned.
+ * must be 64-byte aligned.
*/
CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
@@ -150,10 +131,16 @@
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
NULL, 1, "Floating point instructions executed in hardware");
+static int use_xsaveopt;
int use_xsave; /* non-static for cpu_switch.S */
uint64_t xsave_mask; /* the same */
static struct savefpu *fpu_initialstate;
+struct xsave_area_elm_descr {
+ u_int offset;
+ u_int size;
+} *xsave_area_desc;
+
void
fpusave(void *addr)
{
@@ -200,6 +187,17 @@
TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
xsave_mask &= xsave_mask_user;
+
+ cpuid_count(0xd, 0x1, cp);
+ if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
+ /*
+ * Patch the XSAVE instruction in the cpu_switch code
+ * to XSAVEOPT. We assume that XSAVE encoding used
+ * REX byte, and set the bit 4 of the r/m byte.
+ */
+ ctx_switch_xsave[3] |= 0x10;
+ use_xsaveopt = 1;
+ }
}
/*
@@ -238,7 +236,7 @@
if (use_xsave) {
load_cr4(rcr4() | CR4_XSAVE);
- xsetbv(XCR0, xsave_mask);
+ load_xcr(XCR0, xsave_mask);
}
/*
@@ -270,6 +268,7 @@
fpuinitstate(void *arg __unused)
{
register_t saveintr;
+ int cp[4], i, max_ext_n;
fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
M_WAITOK | M_ZERO);
@@ -291,6 +290,28 @@
*/
bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+ /*
+ * Create a table describing the layout of the CPU Extended
+ * Save Area.
+ */
+ if (use_xsaveopt) {
+ max_ext_n = flsl(xsave_mask);
+ xsave_area_desc = malloc(max_ext_n * sizeof(struct
+ xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+ /* x87 state */
+ xsave_area_desc[0].offset = 0;
+ xsave_area_desc[0].size = 160;
+ /* XMM */
+ xsave_area_desc[1].offset = 160;
+ xsave_area_desc[1].size = 288 - 160;
+
+ for (i = 2; i < max_ext_n; i++) {
+ cpuid_count(0xd, i, cp);
+ xsave_area_desc[i].offset = cp[1];
+ xsave_area_desc[i].size = cp[0];
+ }
+ }
+
start_emulating();
intr_restore(saveintr);
}
@@ -306,7 +327,7 @@
critical_enter();
if (curthread == PCPU_GET(fpcurthread)) {
stop_emulating();
- fpusave(PCPU_GET(curpcb)->pcb_save);
+ fpusave(curpcb->pcb_save);
start_emulating();
PCPU_SET(fpcurthread, 0);
}
@@ -492,25 +513,26 @@
};
/*
- * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
+ * Read the FP status and control words, then generate si_code value
+ * for SIGFPE. The error code chosen will be one of the
+ * FPE_... macros. It will be sent as the second argument to old
+ * BSD-style signal handlers and as "siginfo_t->si_code" (second
+ * argument) to SA_SIGINFO signal handlers.
*
- * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now
- * depend on longjmp() restoring a usable state. Restoring the state
- * or examining it might fail if we didn't clear exceptions.
+ * Some time ago, we cleared the x87 exceptions with FNCLEX there.
+ * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
+ * usermode code which understands the FPU hardware enough to enable
+ * the exceptions, can also handle clearing the exception state in the
+ * handler. The only consequence of not clearing the exception is the
+ * rethrow of the SIGFPE on return from the signal handler and
+ * reexecution of the corresponding instruction.
*
- * The error code chosen will be one of the FPE_... macros. It will be
- * sent as the second argument to old BSD-style signal handlers and as
- * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers.
- *
- * XXX the FP state is not preserved across signal handlers. So signal
- * handlers cannot afford to do FP unless they preserve the state or
- * longjmp() out. Both preserving the state and longjmp()ing may be
- * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable
- * solution for signals other than SIGFPE.
+ * For XMM traps, the exceptions were never cleared.
*/
int
-fputrap()
+fputrap_x87(void)
{
+ struct savefpu *pcb_save;
u_short control, status;
critical_enter();
@@ -521,19 +543,32 @@
* wherever they are.
*/
if (PCPU_GET(fpcurthread) != curthread) {
- control = GET_FPU_CW(curthread);
- status = GET_FPU_SW(curthread);
+ pcb_save = curpcb->pcb_save;
+ control = pcb_save->sv_env.en_cw;
+ status = pcb_save->sv_env.en_sw;
} else {
fnstcw(&control);
fnstsw(&status);
}
- if (PCPU_GET(fpcurthread) == curthread)
- fnclex();
critical_exit();
return (fpetable[status & ((~control & 0x3f) | 0x40)]);
}
+int
+fputrap_sse(void)
+{
+ u_int mxcsr;
+
+ critical_enter();
+ if (PCPU_GET(fpcurthread) != curthread)
+ mxcsr = curpcb->pcb_save->sv_env.en_mxcsr;
+ else
+ stmxcsr(&mxcsr);
+ critical_exit();
+ return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
+}
+
/*
* Implement device not available (DNA) exception
*
@@ -547,7 +582,6 @@
void
fpudna(void)
{
- struct pcb *pcb;
critical_enter();
if (PCPU_GET(fpcurthread) == curthread) {
@@ -569,26 +603,31 @@
* Record new context early in case frstor causes a trap.
*/
PCPU_SET(fpcurthread, curthread);
- pcb = PCPU_GET(curpcb);
fpu_clean_state();
- if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
+ if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) {
/*
* This is the first time this thread has used the FPU or
* the PCB doesn't contain a clean FPU state. Explicitly
* load an initial state.
+ *
+ * We prefer to restore the state from the actual save
+ * area in PCB instead of directly loading from
+ * fpu_initialstate, to ignite the XSAVEOPT
+ * tracking engine.
*/
- fpurestore(fpu_initialstate);
- if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
- fldcw(pcb->pcb_initial_fpucw);
- if (PCB_USER_FPU(pcb))
- set_pcb_flags(pcb,
+ bcopy(fpu_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
+ fpurestore(curpcb->pcb_save);
+ if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
+ fldcw(curpcb->pcb_initial_fpucw);
+ if (PCB_USER_FPU(curpcb))
+ set_pcb_flags(curpcb,
PCB_FPUINITDONE | PCB_USERFPUINITDONE);
else
- set_pcb_flags(pcb, PCB_FPUINITDONE);
+ set_pcb_flags(curpcb, PCB_FPUINITDONE);
} else
- fpurestore(pcb->pcb_save);
+ fpurestore(curpcb->pcb_save);
critical_exit();
}
@@ -614,6 +653,9 @@
fpugetregs(struct thread *td)
{
struct pcb *pcb;
+ uint64_t *xstate_bv, bit;
+ char *sa;
+ int max_ext_n, i;
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
@@ -631,6 +673,25 @@
return (_MC_FPOWNED_FPU);
} else {
critical_exit();
+ if (use_xsaveopt) {
+ /*
+ * Handle partially saved state.
+ */
+ sa = (char *)get_pcb_user_save_pcb(pcb);
+ xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
+ offsetof(struct xstate_hdr, xstate_bv));
+ max_ext_n = flsl(xsave_mask);
+ for (i = 0; i < max_ext_n; i++) {
+ bit = 1 << i;
+ if ((*xstate_bv & bit) != 0)
+ continue;
+ bcopy((char *)fpu_initialstate +
+ xsave_area_desc[i].offset,
+ sa + xsave_area_desc[i].offset,
+ xsave_area_desc[i].size);
+ *xstate_bv |= bit;
+ }
+ }
return (_MC_FPOWNED_PCB);
}
}
@@ -900,16 +961,14 @@
int
fpu_kern_thread(u_int flags)
{
- struct pcb *pcb;
- pcb = PCPU_GET(curpcb);
KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
("Only kthread may use fpu_kern_thread"));
- KASSERT(pcb->pcb_save == get_pcb_user_save_pcb(pcb),
+ KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb),
("mangled pcb_save"));
- KASSERT(PCB_USER_FPU(pcb), ("recursive call"));
+ KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
- set_pcb_flags(pcb, PCB_KERNFPU);
+ set_pcb_flags(curpcb, PCB_KERNFPU);
return (0);
}
@@ -919,5 +978,5 @@
if ((curthread->td_pflags & TDP_KTHREAD) == 0)
return (0);
- return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNFPU) != 0);
+ return ((curpcb->pcb_flags & PCB_KERNFPU) != 0);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/genassym.c
--- a/head/sys/amd64/amd64/genassym.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/genassym.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/genassym.c 230426 2012-01-21 17:45:27Z kib $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/genassym.c 236772 2012-06-09 00:37:26Z iwasaki $");
#include "opt_compat.h"
#include "opt_hwpmc_hooks.h"
@@ -157,6 +157,13 @@
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu));
ASSYM(PCB_USERFPU, sizeof(struct pcb));
+ASSYM(PCB_EFER, offsetof(struct pcb, pcb_efer));
+ASSYM(PCB_STAR, offsetof(struct pcb, pcb_star));
+ASSYM(PCB_LSTAR, offsetof(struct pcb, pcb_lstar));
+ASSYM(PCB_CSTAR, offsetof(struct pcb, pcb_cstar));
+ASSYM(PCB_SFMASK, offsetof(struct pcb, pcb_sfmask));
+ASSYM(PCB_XSMASK, offsetof(struct pcb, pcb_xsmask));
+ASSYM(PCB_FPUSUSPEND, offsetof(struct pcb, pcb_fpususpend));
ASSYM(PCB_SIZE, sizeof(struct pcb));
ASSYM(PCB_FULL_IRET, PCB_FULL_IRET);
ASSYM(PCB_DBREGS, PCB_DBREGS);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/machdep.c
--- a/head/sys/amd64/amd64/machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -39,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/machdep.c 234105 2012-04-10 16:08:46Z marius $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/machdep.c 238623 2012-07-19 19:09:12Z kib $");
#include "opt_atalk.h"
#include "opt_atpic.h"
@@ -74,6 +74,7 @@
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/memrange.h>
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
@@ -206,6 +207,8 @@
struct mtx icu_lock;
+struct mem_range_softc mem_range_softc;
+
struct mtx dt_lock; /* lock for GDT and LDT */
static void
@@ -296,12 +299,10 @@
cpu_setregs();
-#ifdef SMP
/*
* Add BSP as an interrupt target.
*/
intr_add_cpu(0);
-#endif
}
/*
@@ -995,7 +996,7 @@
pcb->pcb_dr3 = 0;
pcb->pcb_dr6 = 0;
pcb->pcb_dr7 = 0;
- if (pcb == PCPU_GET(curpcb)) {
+ if (pcb == curpcb) {
/*
* Clear the debug registers on the running
* CPU, otherwise they will end up affecting
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/mem.c
--- a/head/sys/amd64/amd64/mem.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/mem.c Wed Jul 25 16:40:53 2012 +0300
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/mem.c 238310 2012-07-09 20:42:08Z jhb $");
/*
* Memory special file
@@ -72,8 +72,6 @@
*/
MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
-struct mem_range_softc mem_range_softc;
-
/* ARGSUSED */
int
memrw(struct cdev *dev, struct uio *uio, int flags)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/minidump_machdep.c
--- a/head/sys/amd64/amd64/minidump_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/minidump_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 230623 2012-01-27 20:18:31Z kmacy $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 236503 2012-06-03 08:01:12Z avg $");
#include "opt_pmap.h"
#include "opt_watchdog.h"
@@ -37,9 +37,7 @@
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <sys/msgbuf.h>
-#ifdef SW_WATCHDOG
#include <sys/watchdog.h>
-#endif
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/pmap.h>
@@ -177,9 +175,9 @@
report_progress(progress, dumpsize);
counter &= (1<<24) - 1;
}
-#ifdef SW_WATCHDOG
+
wdog_kern_pat(WD_LASTVAL);
-#endif
+
if (ptr) {
error = dump_write(di, ptr, 0, dumplo, len);
if (error)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/mp_machdep.c
--- a/head/sys/amd64/amd64/mp_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/mp_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/mp_machdep.c 234208 2012-04-13 07:18:19Z avg $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/mp_machdep.c 237037 2012-06-13 22:53:56Z jkim $");
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
@@ -100,7 +100,6 @@
struct pcb stoppcbs[MAXCPU];
struct pcb **susppcbs;
-void **suspfpusave;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
@@ -982,6 +981,60 @@
/* used as a watchpoint to signal AP startup */
cpus = mp_naps;
+ ipi_startup(apic_id, vector);
+
+ /* Wait up to 5 seconds for it to start. */
+ for (ms = 0; ms < 5000; ms++) {
+ if (mp_naps > cpus)
+ return 1; /* return SUCCESS */
+ DELAY(1000);
+ }
+ return 0; /* return FAILURE */
+}
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW,
+ &ipi_range_size, 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif /* COUNT_XINVLTLB_HITS */
+
+/*
+ * Init and startup IPI.
+ */
+void
+ipi_startup(int apic_id, int vector)
+{
+
/*
* first we do an INIT/RESET IPI this INIT IPI might be run, reseting
* and running the target CPU. OR this INIT IPI might be latched (P5
@@ -1032,52 +1085,8 @@
vector, apic_id);
lapic_ipi_wait(-1);
DELAY(200); /* wait ~200uS */
-
- /* Wait up to 5 seconds for it to start. */
- for (ms = 0; ms < 5000; ms++) {
- if (mp_naps > cpus)
- return 1; /* return SUCCESS */
- DELAY(1000);
- }
- return 0; /* return FAILURE */
}
-#ifdef COUNT_XINVLTLB_HITS
-u_int xhits_gbl[MAXCPU];
-u_int xhits_pg[MAXCPU];
-u_int xhits_rng[MAXCPU];
-static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
- sizeof(xhits_gbl), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
- sizeof(xhits_pg), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
- sizeof(xhits_rng), "IU", "");
-
-u_int ipi_global;
-u_int ipi_page;
-u_int ipi_range;
-u_int ipi_range_size;
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW,
- &ipi_range_size, 0, "");
-
-u_int ipi_masked_global;
-u_int ipi_masked_page;
-u_int ipi_masked_range;
-u_int ipi_masked_range_size;
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
- &ipi_masked_global, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
- &ipi_masked_page, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
- &ipi_masked_range, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
- &ipi_masked_range_size, 0, "");
-#endif /* COUNT_XINVLTLB_HITS */
-
/*
* Send an IPI to specified CPU handling the bitmap logic.
*/
@@ -1415,15 +1424,17 @@
cpu = PCPU_GET(cpuid);
if (savectx(susppcbs[cpu])) {
- ctx_fpusave(suspfpusave[cpu]);
+ ctx_fpusave(susppcbs[cpu]->pcb_fpususpend);
wbinvd();
- CPU_SET_ATOMIC(cpu, &stopped_cpus);
+ CPU_SET_ATOMIC(cpu, &suspended_cpus);
} else {
pmap_init_pat();
- load_cr3(susppcbs[cpu]->pcb_cr3);
initializecpu();
PCPU_SET(switchtime, 0);
PCPU_SET(switchticks, ticks);
+
+ /* Indicate that we are resumed */
+ CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
/* Wait for resume */
@@ -1431,7 +1442,6 @@
ia32_pause();
CPU_CLR_ATOMIC(cpu, &started_cpus);
- CPU_CLR_ATOMIC(cpu, &stopped_cpus);
/* Resume MCA and local APIC */
mca_resume();
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/pmap.c
--- a/head/sys/amd64/amd64/pmap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/pmap.c Wed Jul 25 16:40:53 2012 +0300
@@ -77,7 +77,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/pmap.c 233954 2012-04-06 16:41:19Z alc $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/pmap.c 238610 2012-07-19 05:34:19Z alc $");
/*
* Manages physical address maps.
@@ -117,6 +117,7 @@
#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/sx.h>
#include <sys/vmmeter.h>
#include <sys/sched.h>
@@ -167,6 +168,39 @@
#define pa_index(pa) ((pa) >> PDRSHIFT)
#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
+#define NPV_LIST_LOCKS MAXCPU
+
+#define PHYS_TO_PV_LIST_LOCK(pa) \
+ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
+
+#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
+ struct rwlock **_lockp = (lockp); \
+ struct rwlock *_new_lock; \
+ \
+ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \
+ if (_new_lock != *_lockp) { \
+ if (*_lockp != NULL) \
+ rw_wunlock(*_lockp); \
+ *_lockp = _new_lock; \
+ rw_wlock(*_lockp); \
+ } \
+} while (0)
+
+#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+
+#define RELEASE_PV_LIST_LOCK(lockp) do { \
+ struct rwlock **_lockp = (lockp); \
+ \
+ if (*_lockp != NULL) { \
+ rw_wunlock(*_lockp); \
+ *_lockp = NULL; \
+ } \
+} while (0)
+
+#define VM_PAGE_TO_PV_LIST_LOCK(m) \
+ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+
struct pmap kernel_pmap_store;
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
@@ -199,9 +233,22 @@
static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
/*
+ * Isolate the global pv list lock from data and other locks to prevent false
+ * sharing within the cache.
+ */
+static struct {
+ struct rwlock lock;
+ char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)];
+} pvh_global __aligned(CACHE_LINE_SIZE);
+
+#define pvh_global_lock pvh_global.lock
+
+/*
* Data for the pv entry allocation mechanism
*/
-static long pv_entry_count;
+static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
+static struct mtx pv_chunks_mutex;
+static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
static struct md_page *pv_table;
/*
@@ -215,11 +262,19 @@
*/
static caddr_t crashdumpmap;
+static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t locked_pmap, boolean_t try);
-static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
-static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
-static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
+static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
+static int popcnt_pc_map_elem(uint64_t elem);
+static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
+static void reserve_pv_entries(pmap_t pmap, int needed,
+ struct rwlock **lockp);
+static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp);
+static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp);
+static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp);
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
vm_offset_t va);
@@ -227,12 +282,14 @@
static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
+ vm_offset_t va, struct rwlock **lockp);
static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
vm_offset_t va);
static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot);
+ vm_prot_t prot, struct rwlock **lockp);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
- vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+ vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
@@ -240,30 +297,32 @@
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+ struct rwlock **lockp);
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- vm_page_t *free);
+ vm_page_t *free, struct rwlock **lockp);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
- vm_offset_t sva, pd_entry_t ptepde, vm_page_t *free);
+ vm_offset_t sva, pd_entry_t ptepde, vm_page_t *free,
+ struct rwlock **lockp);
static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
vm_page_t *free);
-static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
- vm_offset_t va);
-static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
- vm_page_t m);
+ vm_page_t m, struct rwlock **lockp);
static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
pd_entry_t newpde);
static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
-static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
-
-static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
+ struct rwlock **lockp);
+static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va,
+ struct rwlock **lockp);
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
+ struct rwlock **lockp);
+
static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_page_t* free);
static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *);
@@ -580,6 +639,11 @@
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
+ /*
+ * Initialize the global pv list lock.
+ */
+ rw_init(&pvh_global_lock, "pmap pv global");
+
/*
* Reserve some special page table entries/VA space for temporary
* mapping of pages.
@@ -744,6 +808,17 @@
}
/*
+ * Initialize the pv chunk list mutex.
+ */
+ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
+
+ /*
+ * Initialize the pool of pv list locks.
+ */
+ for (i = 0; i < NPV_LIST_LOCKS; i++)
+ rw_init(&pv_list_locks[i], "pmap pv list");
+
+ /*
* Calculate the size of the pv head table for superpages.
*/
for (i = 0; phys_avail[i + 1]; i += 2);
@@ -1625,8 +1700,10 @@
}
/*
- * this routine is called if the page table page is not
- * mapped correctly.
+ * This routine is called if the desired page table page does not exist.
+ *
+ * If page table page allocation fails, this routine may sleep before
+ * returning NULL. It sleeps only if a lock pointer was given.
*
* Note: If a page allocation fails at page table level two or three,
* one or two pages may be held during the wait, only to be released
@@ -1634,25 +1711,23 @@
* race conditions.
*/
static vm_page_t
-_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
{
vm_page_t m, pdppg, pdpg;
- KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
- (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
- ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
/*
* Allocate a page table page.
*/
if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
- if (flags & M_WAITOK) {
+ if (lockp != NULL) {
+ RELEASE_PV_LIST_LOCK(lockp);
PMAP_UNLOCK(pmap);
- vm_page_unlock_queues();
+ rw_runlock(&pvh_global_lock);
VM_WAIT;
- vm_page_lock_queues();
+ rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
}
@@ -1693,7 +1768,7 @@
if ((*pml4 & PG_V) == 0) {
/* Have to allocate a new pdp, recurse */
if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index,
- flags) == NULL) {
+ lockp) == NULL) {
--m->wire_count;
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_free_zero(m);
@@ -1726,7 +1801,7 @@
if ((*pml4 & PG_V) == 0) {
/* Have to allocate a new pd, recurse */
if (_pmap_allocpte(pmap, NUPDE + pdpindex,
- flags) == NULL) {
+ lockp) == NULL) {
--m->wire_count;
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_free_zero(m);
@@ -1740,7 +1815,7 @@
if ((*pdp & PG_V) == 0) {
/* Have to allocate a new pd, recurse */
if (_pmap_allocpte(pmap, NUPDE + pdpindex,
- flags) == NULL) {
+ lockp) == NULL) {
--m->wire_count;
atomic_subtract_int(&cnt.v_wire_count,
1);
@@ -1766,15 +1841,12 @@
}
static vm_page_t
-pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags)
+pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
{
vm_pindex_t pdpindex, ptepindex;
pdp_entry_t *pdpe;
vm_page_t pdpg;
- KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
- (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
- ("pmap_allocpde: flags is neither M_NOWAIT nor M_WAITOK"));
retry:
pdpe = pmap_pdpe(pmap, va);
if (pdpe != NULL && (*pdpe & PG_V) != 0) {
@@ -1785,24 +1857,20 @@
/* Allocate a pd page. */
ptepindex = pmap_pde_pindex(va);
pdpindex = ptepindex >> NPDPEPGSHIFT;
- pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, flags);
- if (pdpg == NULL && (flags & M_WAITOK))
+ pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp);
+ if (pdpg == NULL && lockp != NULL)
goto retry;
}
return (pdpg);
}
static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
+pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
{
vm_pindex_t ptepindex;
pd_entry_t *pd;
vm_page_t m;
- KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
- (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
- ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
/*
* Calculate pagetable page index
*/
@@ -1818,7 +1886,7 @@
* normal 4K page.
*/
if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
- if (!pmap_demote_pde(pmap, pd, va)) {
+ if (!pmap_demote_pde_locked(pmap, pd, va, lockp)) {
/*
* Invalidation of the 2MB page mapping may have caused
* the deallocation of the underlying PD page.
@@ -1839,8 +1907,8 @@
* Here if the pte page isn't mapped, or if it has been
* deallocated.
*/
- m = _pmap_allocpte(pmap, ptepindex, flags);
- if (m == NULL && (flags & M_WAITOK))
+ m = _pmap_allocpte(pmap, ptepindex, lockp);
+ if (m == NULL && lockp != NULL)
goto retry;
}
return (m);
@@ -1993,7 +2061,7 @@
pv_to_chunk(pv_entry_t pv)
{
- return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
+ return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
}
#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
@@ -2002,10 +2070,7 @@
#define PC_FREE1 0xfffffffffffffffful
#define PC_FREE2 0x000000fffffffffful
-static uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
-
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
- "Current number of pv entries");
+static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
#ifdef PV_STATS
static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
@@ -2019,80 +2084,159 @@
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
"Number of times tried to get a chunk page but failed.");
-static long pv_entry_frees, pv_entry_allocs;
+static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
static int pv_entry_spare;
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
"Current number of pv entry frees");
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
"Current number of pv entry allocs");
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
+ "Current number of pv entries");
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
"Current number of spare pv entries");
-
-static int pmap_collect_inactive, pmap_collect_active;
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
- "Current number times pmap_collect called on inactive queue");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
- "Current number times pmap_collect called on active queue");
#endif
/*
* We are in a serious low memory condition. Resort to
* drastic measures to free some pages so we can allocate
- * another pv entry chunk. This is normally called to
- * unmap inactive pages, and if necessary, active pages.
+ * another pv entry chunk.
+ *
+ * Returns NULL if PV entries were reclaimed from the specified pmap.
*
* We do not, however, unmap 2mpages because subsequent accesses will
* allocate per-page pv entries until repromotion occurs, thereby
* exacerbating the shortage of free pv entries.
*/
-static void
-pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
+static vm_page_t
+reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
{
+ struct pch new_tail;
+ struct pv_chunk *pc;
+ struct md_page *pvh;
pd_entry_t *pde;
pmap_t pmap;
pt_entry_t *pte, tpte;
- pv_entry_t next_pv, pv;
+ pv_entry_t pv;
vm_offset_t va;
- vm_page_t m, free;
-
- TAILQ_FOREACH(m, &vpq->pl, pageq) {
- if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
+ vm_page_t free, m, m_pc;
+ uint64_t inuse;
+ int bit, field, freed;
+
+ rw_assert(&pvh_global_lock, RA_LOCKED);
+ PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+ KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
+ pmap = NULL;
+ free = m_pc = NULL;
+ TAILQ_INIT(&new_tail);
+ mtx_lock(&pv_chunks_mutex);
+ while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && free == NULL) {
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
+ if (pmap != pc->pc_pmap) {
+ if (pmap != NULL) {
+ pmap_invalidate_all(pmap);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ }
+ pmap = pc->pc_pmap;
+ /* Avoid deadlock and lock recursion. */
+ if (pmap > locked_pmap) {
+ RELEASE_PV_LIST_LOCK(lockp);
+ PMAP_LOCK(pmap);
+ } else if (pmap != locked_pmap &&
+ !PMAP_TRYLOCK(pmap)) {
+ pmap = NULL;
+ TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+ mtx_lock(&pv_chunks_mutex);
+ continue;
+ }
+ }
+
+ /*
+ * Destroy every non-wired, 4 KB page mapping in the chunk.
+ */
+ freed = 0;
+ for (field = 0; field < _NPCM; field++) {
+ for (inuse = ~pc->pc_map[field] & pc_freemask[field];
+ inuse != 0; inuse &= ~(1UL << bit)) {
+ bit = bsfq(inuse);
+ pv = &pc->pc_pventry[field * 64 + bit];
+ va = pv->pv_va;
+ pde = pmap_pde(pmap, va);
+ if ((*pde & PG_PS) != 0)
+ continue;
+ pte = pmap_pde_to_pte(pde, va);
+ if ((*pte & PG_W) != 0)
+ continue;
+ tpte = pte_load_clear(pte);
+ if ((tpte & PG_G) != 0)
+ pmap_invalidate_page(pmap, va);
+ m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
+ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+ vm_page_dirty(m);
+ if ((tpte & PG_A) != 0)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ (m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list)) {
+ vm_page_aflag_clear(m,
+ PGA_WRITEABLE);
+ }
+ }
+ pc->pc_map[field] |= 1UL << bit;
+ pmap_unuse_pt(pmap, va, *pde, &free);
+ freed++;
+ }
+ }
+ if (freed == 0) {
+ TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+ mtx_lock(&pv_chunks_mutex);
continue;
- TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
- /* Avoid deadlock and lock recursion. */
- if (pmap > locked_pmap)
- PMAP_LOCK(pmap);
- else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
- continue;
- pmap_resident_count_dec(pmap, 1);
- pde = pmap_pde(pmap, va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
- " a 2mpage in page %p's pv list", m));
- pte = pmap_pde_to_pte(pde, va);
- tpte = pte_load_clear(pte);
- KASSERT((tpte & PG_W) == 0,
- ("pmap_collect: wired pte %#lx", tpte));
- if (tpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- free = NULL;
- pmap_unuse_pt(pmap, va, *pde, &free);
- pmap_invalidate_page(pmap, va);
- pmap_free_zero_pages(free);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- free_pv_entry(pmap, pv);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
}
- if (TAILQ_EMPTY(&m->md.pv_list) &&
- TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
+ /* Every freed mapping is for a 4 KB page. */
+ pmap_resident_count_dec(pmap, freed);
+ PV_STAT(atomic_add_long(&pv_entry_frees, freed));
+ PV_STAT(atomic_add_int(&pv_entry_spare, freed));
+ PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
+ pc->pc_map[2] == PC_FREE2) {
+ PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
+ PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
+ /* Entire chunk is free; return it. */
+ m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
+ dump_drop_page(m_pc->phys_addr);
+ mtx_lock(&pv_chunks_mutex);
+ break;
+ }
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+ mtx_lock(&pv_chunks_mutex);
+ /* One freed pv entry in locked_pmap is sufficient. */
+ if (pmap == locked_pmap)
+ break;
}
+ TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
+ if (pmap != NULL) {
+ pmap_invalidate_all(pmap);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ }
+ if (m_pc == NULL && free != NULL) {
+ m_pc = free;
+ free = m_pc->right;
+ /* Recycle a freed page table page. */
+ m_pc->wire_count = 1;
+ atomic_add_int(&cnt.v_wire_count, 1);
+ }
+ pmap_free_zero_pages(free);
+ return (m_pc);
}
/*
@@ -2101,15 +2245,14 @@
static void
free_pv_entry(pmap_t pmap, pv_entry_t pv)
{
- vm_page_t m;
struct pv_chunk *pc;
int idx, field, bit;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- PV_STAT(pv_entry_frees++);
- PV_STAT(pv_entry_spare++);
- pv_entry_count--;
+ PV_STAT(atomic_add_long(&pv_entry_frees, 1));
+ PV_STAT(atomic_add_int(&pv_entry_spare, 1));
+ PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
pc = pv_to_chunk(pv);
idx = pv - &pc->pc_pventry[0];
field = idx / 64;
@@ -2125,9 +2268,20 @@
return;
}
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
+ free_pv_chunk(pc);
+}
+
+static void
+free_pv_chunk(struct pv_chunk *pc)
+{
+ vm_page_t m;
+
+ mtx_lock(&pv_chunks_mutex);
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
+ PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
+ PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
/* entire chunk is free, return it */
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m->phys_addr);
@@ -2136,22 +2290,24 @@
}
/*
- * get a new pv_entry, allocating a block from the system
- * when needed.
+ * Returns a new PV entry, allocating a new PV chunk from the system when
+ * needed. If this PV chunk allocation fails and a PV list lock pointer was
+ * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is
+ * returned.
+ *
+ * The given PV list lock may be released.
*/
static pv_entry_t
-get_pv_entry(pmap_t pmap, boolean_t try)
+get_pv_entry(pmap_t pmap, struct rwlock **lockp)
{
- struct vpgqueues *pq;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
vm_page_t m;
+ rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- PV_STAT(pv_entry_allocs++);
- pq = NULL;
+ PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
retry:
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
if (pc != NULL) {
@@ -2171,52 +2327,130 @@
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
pc_list);
}
- pv_entry_count++;
- PV_STAT(pv_entry_spare--);
+ PV_STAT(atomic_add_long(&pv_entry_count, 1));
+ PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
return (pv);
}
}
/* No free items, allocate another chunk */
- m = vm_page_alloc(NULL, 0, (pq == &vm_page_queues[PQ_ACTIVE] ?
- VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ |
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED);
if (m == NULL) {
- if (try) {
+ if (lockp == NULL) {
PV_STAT(pc_chunk_tryfail++);
return (NULL);
}
- /*
- * Reclaim pv entries: At first, destroy mappings to inactive
- * pages. After that, if a pv chunk entry is still needed,
- * destroy mappings to active pages.
- */
- if (pq == NULL) {
- PV_STAT(pmap_collect_inactive++);
- pq = &vm_page_queues[PQ_INACTIVE];
- } else if (pq == &vm_page_queues[PQ_INACTIVE]) {
- PV_STAT(pmap_collect_active++);
- pq = &vm_page_queues[PQ_ACTIVE];
- } else
- panic("get_pv_entry: allocation failed");
- pmap_collect(pmap, pq);
- goto retry;
+ m = reclaim_pv_chunk(pmap, lockp);
+ if (m == NULL)
+ goto retry;
}
- PV_STAT(pc_chunk_count++);
- PV_STAT(pc_chunk_allocs++);
+ PV_STAT(atomic_add_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
dump_add_page(m->phys_addr);
pc = (void *)PHYS_TO_DMAP(m->phys_addr);
pc->pc_pmap = pmap;
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
+ mtx_lock(&pv_chunks_mutex);
+ TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- pv_entry_count++;
- PV_STAT(pv_entry_spare += _NPCPV - 1);
+ PV_STAT(atomic_add_long(&pv_entry_count, 1));
+ PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
return (pv);
}
/*
+ * Returns the number of one bits within the given PV chunk map element.
+ */
+static int
+popcnt_pc_map_elem(uint64_t elem)
+{
+ int count;
+
+ /*
+ * This simple method of counting the one bits performs well because
+ * the given element typically contains more zero bits than one bits.
+ */
+ count = 0;
+ for (; elem != 0; elem &= elem - 1)
+ count++;
+ return (count);
+}
+
+/*
+ * Ensure that the number of spare PV entries in the specified pmap meets or
+ * exceeds the given count, "needed".
+ *
+ * The given PV list lock may be released.
+ */
+static void
+reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
+{
+ struct pch new_tail;
+ struct pv_chunk *pc;
+ int avail, free;
+ vm_page_t m;
+
+ rw_assert(&pvh_global_lock, RA_LOCKED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
+
+ /*
+ * Newly allocated PV chunks must be stored in a private list until
+ * the required number of PV chunks have been allocated. Otherwise,
+ * reclaim_pv_chunk() could recycle one of these chunks. In
+ * contrast, these chunks must be added to the pmap upon allocation.
+ */
+ TAILQ_INIT(&new_tail);
+retry:
+ avail = 0;
+ TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
+ if ((cpu_feature2 & CPUID2_POPCNT) == 0) {
+ free = popcnt_pc_map_elem(pc->pc_map[0]);
+ free += popcnt_pc_map_elem(pc->pc_map[1]);
+ free += popcnt_pc_map_elem(pc->pc_map[2]);
+ } else {
+ free = popcntq(pc->pc_map[0]);
+ free += popcntq(pc->pc_map[1]);
+ free += popcntq(pc->pc_map[2]);
+ }
+ if (free == 0)
+ break;
+ avail += free;
+ if (avail >= needed)
+ break;
+ }
+ for (; avail < needed; avail += _NPCPV) {
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED);
+ if (m == NULL) {
+ m = reclaim_pv_chunk(pmap, lockp);
+ if (m == NULL)
+ goto retry;
+ }
+ PV_STAT(atomic_add_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
+ dump_add_page(m->phys_addr);
+ pc = (void *)PHYS_TO_DMAP(m->phys_addr);
+ pc->pc_pmap = pmap;
+ pc->pc_map[0] = PC_FREE0;
+ pc->pc_map[1] = PC_FREE1;
+ pc->pc_map[2] = PC_FREE2;
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+ PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
+ }
+ if (!TAILQ_EMPTY(&new_tail)) {
+ mtx_lock(&pv_chunks_mutex);
+ TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
+ }
+}
+
+/*
* First find and then remove the pv entry for the specified pmap and virtual
* address from the specified pv list. Returns the pv entry if found and NULL
* otherwise. This operation can be performed on pv lists for either 4KB or
@@ -2227,7 +2461,7 @@
{
pv_entry_t pv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_LOCKED);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
@@ -2243,20 +2477,26 @@
* entries for each of the 4KB page mappings.
*/
static void
-pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
+pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp)
{
struct md_page *pvh;
+ struct pv_chunk *pc;
pv_entry_t pv;
vm_offset_t va_last;
vm_page_t m;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ int bit, field;
+
+ rw_assert(&pvh_global_lock, RA_LOCKED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the 2mpage's pv entry for this mapping to the first
- * page's pv list.
+ * page's pv list. Once this transfer begins, the pv list lock
+ * must not be released until the last pv entry is reinstantiated.
*/
pvh = pa_to_pvh(pa);
va = trunc_2mpage(va);
@@ -2265,14 +2505,37 @@
m = PHYS_TO_VM_PAGE(pa);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
/* Instantiate the remaining NPTEPG - 1 pv entries. */
+ PV_STAT(atomic_add_long(&pv_entry_allocs, NPTEPG - 1));
va_last = va + NBPDR - PAGE_SIZE;
- do {
- m++;
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_pv_demote_pde: page %p is not managed", m));
- va += PAGE_SIZE;
- pmap_insert_entry(pmap, va, m);
- } while (va < va_last);
+ for (;;) {
+ pc = TAILQ_FIRST(&pmap->pm_pvchunk);
+ KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
+ pc->pc_map[2] != 0, ("pmap_pv_demote_pde: missing spare"));
+ for (field = 0; field < _NPCM; field++) {
+ while (pc->pc_map[field]) {
+ bit = bsfq(pc->pc_map[field]);
+ pc->pc_map[field] &= ~(1ul << bit);
+ pv = &pc->pc_pventry[field * 64 + bit];
+ va += PAGE_SIZE;
+ pv->pv_va = va;
+ m++;
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_pv_demote_pde: page %p is not managed", m));
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ if (va == va_last)
+ goto out;
+ }
+ }
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+ }
+out:
+ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+ }
+ PV_STAT(atomic_add_long(&pv_entry_count, NPTEPG - 1));
+ PV_STAT(atomic_subtract_int(&pv_entry_spare, NPTEPG - 1));
}
/*
@@ -2281,23 +2544,25 @@
* for the 2MB page mapping.
*/
static void
-pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
+pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
vm_offset_t va_last;
vm_page_t m;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_LOCKED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
- * Transfer the first page's pv entry for this mapping to the
- * 2mpage's pv list. Aside from avoiding the cost of a call
- * to get_pv_entry(), a transfer avoids the possibility that
- * get_pv_entry() calls pmap_collect() and that pmap_collect()
- * removes one of the mappings that is being promoted.
+ * Transfer the first page's pv entry for this mapping to the 2mpage's
+ * pv list. Aside from avoiding the cost of a call to get_pv_entry(),
+ * a transfer avoids the possibility that get_pv_entry() calls
+ * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the
+ * mappings that is being promoted.
*/
m = PHYS_TO_VM_PAGE(pa);
va = trunc_2mpage(va);
@@ -2329,48 +2594,22 @@
free_pv_entry(pmap, pv);
}
-static void
-pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
-{
- struct md_page *pvh;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- pmap_pvh_free(&m->md, pmap, va);
- if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) {
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- if (TAILQ_EMPTY(&pvh->pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- }
-}
-
/*
- * Create a pv entry for page at pa for
- * (pmap, va).
+ * Conditionally create the PV entry for a 4KB page mapping if the required
+ * memory can be allocated without resorting to reclamation.
*/
-static void
-pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
+static boolean_t
+pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ struct rwlock **lockp)
{
pv_entry_t pv;
+ rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- pv = get_pv_entry(pmap, FALSE);
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
-}
-
-/*
- * Conditionally create a pv entry.
- */
-static boolean_t
-pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
-{
- pv_entry_t pv;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
+ /* Pass NULL instead of the lock pointer to disable reclamation. */
+ if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
pv->pv_va = va;
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
return (TRUE);
} else
@@ -2378,17 +2617,22 @@
}
/*
- * Create the pv entry for a 2MB page mapping.
+ * Conditionally create the PV entry for a 2MB page mapping if the required
+ * memory can be allocated without resorting to reclamation.
*/
static boolean_t
-pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
+pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
+ rw_assert(&pvh_global_lock, RA_LOCKED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ /* Pass NULL instead of the lock pointer to disable reclamation. */
+ if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
pv->pv_va = va;
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
pvh = pa_to_pvh(pa);
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
return (TRUE);
@@ -2417,6 +2661,20 @@
static boolean_t
pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
{
+ struct rwlock *lock;
+ boolean_t rv;
+
+ lock = NULL;
+ rv = pmap_demote_pde_locked(pmap, pde, va, &lock);
+ if (lock != NULL)
+ rw_wunlock(lock);
+ return (rv);
+}
+
+static boolean_t
+pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+ struct rwlock **lockp)
+{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
vm_paddr_t mptepa;
@@ -2451,7 +2709,8 @@
DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
free = NULL;
- pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free);
+ pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free,
+ lockp);
pmap_invalidate_page(pmap, trunc_2mpage(va));
pmap_free_zero_pages(free);
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx"
@@ -2491,6 +2750,17 @@
pmap_fill_ptp(firstpte, newpte);
/*
+ * The spare PV entries must be reserved prior to demoting the
+ * mapping, that is, prior to changing the PDE. Otherwise, the state
+ * of the PDE and the PV lists will be inconsistent, which can result
+ * in reclaim_pv_chunk() attempting to remove a PV entry from the
+ * wrong PV list and pmap_pv_demote_pde() failing to find the expected
+ * PV entry for the 2MB page mapping that is being demoted.
+ */
+ if ((oldpde & PG_MANAGED) != 0)
+ reserve_pv_entries(pmap, NPTEPG - 1, lockp);
+
+ /*
* Demote the mapping. This pmap is locked. The old PDE has
* PG_A set. If the old PDE has PG_RW set, it also has PG_M
* set. Thus, there is no danger of a race with another
@@ -2509,18 +2779,12 @@
pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
/*
- * Demote the pv entry. This depends on the earlier demotion
- * of the mapping. Specifically, the (re)creation of a per-
- * page pv entry might trigger the execution of pmap_collect(),
- * which might reclaim a newly (re)created per-page pv entry
- * and destroy the associated mapping. In order to destroy
- * the mapping, the PDE must have already changed from mapping
- * the 2mpage to referencing the page table page.
+ * Demote the PV entry.
*/
if ((oldpde & PG_MANAGED) != 0)
- pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME);
-
- pmap_pde_demotions++;
+ pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp);
+
+ atomic_add_long(&pmap_pde_demotions, 1);
CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx"
" in pmap %p", va, pmap);
return (TRUE);
@@ -2531,7 +2795,7 @@
*/
static int
pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- vm_page_t *free)
+ vm_page_t *free, struct rwlock **lockp)
{
struct md_page *pvh;
pd_entry_t oldpde;
@@ -2553,6 +2817,7 @@
pmap_invalidate_page(kernel_pmap, sva);
pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
if (oldpde & PG_MANAGED) {
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME);
pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
pmap_pvh_free(pvh, pmap, sva);
eva = sva + NBPDR;
@@ -2568,7 +2833,7 @@
}
}
if (pmap == kernel_pmap) {
- if (!pmap_demote_pde(pmap, pdq, sva))
+ if (!pmap_demote_pde_locked(pmap, pdq, sva, lockp))
panic("pmap_remove_pde: failed demotion");
} else {
mpte = pmap_lookup_pt_page(pmap, sva);
@@ -2590,8 +2855,9 @@
*/
static int
pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va,
- pd_entry_t ptepde, vm_page_t *free)
+ pd_entry_t ptepde, vm_page_t *free, struct rwlock **lockp)
{
+ struct md_page *pvh;
pt_entry_t oldpte;
vm_page_t m;
@@ -2606,7 +2872,14 @@
vm_page_dirty(m);
if (oldpte & PG_A)
vm_page_aflag_set(m, PGA_REFERENCED);
- pmap_remove_entry(pmap, m, va);
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
+ pmap_pvh_free(&m->md, pmap, va);
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ (m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ }
}
return (pmap_unuse_pt(pmap, va, ptepde, free));
}
@@ -2617,6 +2890,7 @@
static void
pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, vm_page_t *free)
{
+ struct rwlock *lock;
pt_entry_t *pte;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -2625,7 +2899,10 @@
pte = pmap_pde_to_pte(pde, va);
if ((*pte & PG_V) == 0)
return;
- pmap_remove_pte(pmap, pte, va, *pde, free);
+ lock = NULL;
+ pmap_remove_pte(pmap, pte, va, *pde, free, &lock);
+ if (lock != NULL)
+ rw_wunlock(lock);
pmap_invalidate_page(pmap, va);
}
@@ -2638,6 +2915,7 @@
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
+ struct rwlock *lock;
vm_offset_t va, va_next;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
@@ -2654,7 +2932,7 @@
anyvalid = 0;
- vm_page_lock_queues();
+ rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
/*
@@ -2670,6 +2948,7 @@
}
}
+ lock = NULL;
for (; sva < eva; sva = va_next) {
if (pmap->pm_stats.resident_count == 0)
@@ -2722,9 +3001,10 @@
*/
if ((ptpaddr & PG_G) == 0)
anyvalid = 1;
- pmap_remove_pde(pmap, pde, sva, &free);
+ pmap_remove_pde(pmap, pde, sva, &free, &lock);
continue;
- } else if (!pmap_demote_pde(pmap, pde, sva)) {
+ } else if (!pmap_demote_pde_locked(pmap, pde, sva,
+ &lock)) {
/* The large page mapping was destroyed. */
continue;
} else
@@ -2753,7 +3033,8 @@
anyvalid = 1;
else if (va == va_next)
va = sva;
- if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free)) {
+ if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free,
+ &lock)) {
sva += PAGE_SIZE;
break;
}
@@ -2761,10 +3042,12 @@
if (va != va_next)
pmap_invalidate_range(pmap, va, sva);
}
+ if (lock != NULL)
+ rw_wunlock(lock);
out:
if (anyvalid)
pmap_invalidate_all(pmap);
- vm_page_unlock_queues();
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(free);
}
@@ -2796,7 +3079,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
free = NULL;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -2835,7 +3118,7 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
pmap_free_zero_pages(free);
}
@@ -2956,12 +3239,12 @@
} else {
if (!pv_lists_locked) {
pv_lists_locked = TRUE;
- if (!mtx_trylock(&vm_page_queue_mtx)) {
+ if (!rw_try_rlock(&pvh_global_lock)) {
if (anychanged)
pmap_invalidate_all(
pmap);
PMAP_UNLOCK(pmap);
- vm_page_lock_queues();
+ rw_rlock(&pvh_global_lock);
goto resume;
}
}
@@ -3012,7 +3295,7 @@
if (anychanged)
pmap_invalidate_all(pmap);
if (pv_lists_locked)
- vm_page_unlock_queues();
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -3024,7 +3307,8 @@
* identical characteristics.
*/
static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+ struct rwlock **lockp)
{
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
@@ -3042,7 +3326,7 @@
setpde:
newpde = *firstpte;
if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
- pmap_pde_p_failures++;
+ atomic_add_long(&pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
@@ -3067,7 +3351,7 @@
setpte:
oldpte = *pte;
if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
- pmap_pde_p_failures++;
+ atomic_add_long(&pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
@@ -3086,7 +3370,7 @@
" in pmap %p", oldpteva, pmap);
}
if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
- pmap_pde_p_failures++;
+ atomic_add_long(&pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
@@ -3111,7 +3395,7 @@
* Promote the pv entries.
*/
if ((newpde & PG_MANAGED) != 0)
- pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME);
+ pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp);
/*
* Propagate the PAT index to its proper position.
@@ -3127,7 +3411,7 @@
else
pde_store(pde, PG_PS | newpde);
- pmap_pde_promotions++;
+ atomic_add_long(&pmap_pde_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"
" in pmap %p", va, pmap);
}
@@ -3148,6 +3432,7 @@
pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
vm_prot_t prot, boolean_t wired)
{
+ struct rwlock *lock;
pd_entry_t *pde;
pt_entry_t *pte;
pt_entry_t newpte, origpte;
@@ -3161,115 +3446,16 @@
KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
va));
+ KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
+ va >= kmi.clean_eva,
+ ("pmap_enter: managed mapping within the clean submap"));
KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
VM_OBJECT_LOCKED(m->object),
("pmap_enter: page %p is not busy", m));
-
- mpte = NULL;
-
- vm_page_lock_queues();
- PMAP_LOCK(pmap);
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS)
- mpte = pmap_allocpte(pmap, va, M_WAITOK);
-
- pde = pmap_pde(pmap, va);
- if (pde != NULL && (*pde & PG_V) != 0) {
- if ((*pde & PG_PS) != 0)
- panic("pmap_enter: attempted pmap_enter on 2MB page");
- pte = pmap_pde_to_pte(pde, va);
- } else
- panic("pmap_enter: invalid page directory va=%#lx", va);
-
pa = VM_PAGE_TO_PHYS(m);
- om = NULL;
- origpte = *pte;
- opa = origpte & PG_FRAME;
-
- /*
- * Mapping has not changed, must be protection or wiring change.
- */
- if (origpte && (opa == pa)) {
- /*
- * Wiring change, just update stats. We don't worry about
- * wiring PT pages as they remain resident as long as there
- * are valid mappings in them. Hence, if a user page is wired,
- * the PT page will be also.
- */
- if (wired && ((origpte & PG_W) == 0))
- pmap->pm_stats.wired_count++;
- else if (!wired && (origpte & PG_W))
- pmap->pm_stats.wired_count--;
-
- /*
- * Remove extra pte reference
- */
- if (mpte)
- mpte->wire_count--;
-
- if (origpte & PG_MANAGED) {
- om = m;
- pa |= PG_MANAGED;
- }
- goto validate;
- }
-
- pv = NULL;
-
- /*
- * Mapping has changed, invalidate old range and fall through to
- * handle validating new mapping.
- */
- if (opa) {
- if (origpte & PG_W)
- pmap->pm_stats.wired_count--;
- if (origpte & PG_MANAGED) {
- om = PHYS_TO_VM_PAGE(opa);
- pv = pmap_pvh_remove(&om->md, pmap, va);
- }
- if (mpte != NULL) {
- mpte->wire_count--;
- KASSERT(mpte->wire_count > 0,
- ("pmap_enter: missing reference to page table page,"
- " va: 0x%lx", va));
- }
- } else
- pmap_resident_count_inc(pmap, 1);
-
- /*
- * Enter on the PV list if part of our managed memory.
- */
- if ((m->oflags & VPO_UNMANAGED) == 0) {
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
- ("pmap_enter: managed mapping within the clean submap"));
- if (pv == NULL)
- pv = get_pv_entry(pmap, FALSE);
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
- pa |= PG_MANAGED;
- } else if (pv != NULL)
- free_pv_entry(pmap, pv);
-
- /*
- * Increment counters
- */
- if (wired)
- pmap->pm_stats.wired_count++;
-
-validate:
- /*
- * Now validate mapping with desired protection/wiring.
- */
newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V);
- if ((prot & VM_PROT_WRITE) != 0) {
+ if ((prot & VM_PROT_WRITE) != 0)
newpte |= PG_RW;
- if ((newpte & PG_MANAGED) != 0)
- vm_page_aflag_set(m, PGA_WRITEABLE);
- }
if ((prot & VM_PROT_EXECUTE) == 0)
newpte |= pg_nx;
if (wired)
@@ -3279,40 +3465,143 @@
if (pmap == kernel_pmap)
newpte |= PG_G;
+ mpte = om = NULL;
+
+ lock = NULL;
+ rw_rlock(&pvh_global_lock);
+ PMAP_LOCK(pmap);
+
/*
- * if the mapping or permission bits are different, we need
- * to update the pte.
+ * In the case that a page table page is not
+ * resident, we are creating it here.
*/
- if ((origpte & ~(PG_M|PG_A)) != newpte) {
- newpte |= PG_A;
- if ((access & VM_PROT_WRITE) != 0)
- newpte |= PG_M;
- if (origpte & PG_V) {
- invlva = FALSE;
- origpte = pte_load_store(pte, newpte);
- if (origpte & PG_A) {
- if (origpte & PG_MANAGED)
- vm_page_aflag_set(om, PGA_REFERENCED);
- if (opa != VM_PAGE_TO_PHYS(m) || ((origpte &
- PG_NX) == 0 && (newpte & PG_NX)))
- invlva = TRUE;
+retry:
+ pde = pmap_pde(pmap, va);
+ if (pde != NULL && (*pde & PG_V) != 0 && ((*pde & PG_PS) == 0 ||
+ pmap_demote_pde_locked(pmap, pde, va, &lock))) {
+ pte = pmap_pde_to_pte(pde, va);
+ if (va < VM_MAXUSER_ADDRESS && mpte == NULL) {
+ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+ mpte->wire_count++;
+ }
+ } else if (va < VM_MAXUSER_ADDRESS) {
+ /*
+ * Here if the pte page isn't mapped, or if it has been
+ * deallocated.
+ */
+ mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), &lock);
+ goto retry;
+ } else
+ panic("pmap_enter: invalid page directory va=%#lx", va);
+
+ origpte = *pte;
+ opa = origpte & PG_FRAME;
+
+ /*
+ * Is the specified virtual address already mapped?
+ */
+ if ((origpte & PG_V) != 0) {
+ /*
+ * Wiring change, just update stats. We don't worry about
+ * wiring PT pages as they remain resident as long as there
+ * are valid mappings in them. Hence, if a user page is wired,
+ * the PT page will be also.
+ */
+ if (wired && (origpte & PG_W) == 0)
+ pmap->pm_stats.wired_count++;
+ else if (!wired && (origpte & PG_W))
+ pmap->pm_stats.wired_count--;
+
+ /*
+ * Remove the extra PT page reference.
+ */
+ if (mpte != NULL) {
+ mpte->wire_count--;
+ KASSERT(mpte->wire_count > 0,
+ ("pmap_enter: missing reference to page table page,"
+ " va: 0x%lx", va));
+ }
+
+ /*
+ * Has the mapping changed?
+ */
+ if (opa == pa) {
+ /*
+ * No, might be a protection or wiring change.
+ */
+ if ((origpte & PG_MANAGED) != 0) {
+ newpte |= PG_MANAGED;
+ om = m;
}
- if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if ((origpte & PG_MANAGED) != 0)
- vm_page_dirty(om);
- if ((newpte & PG_RW) == 0)
- invlva = TRUE;
- }
- if ((origpte & PG_MANAGED) != 0 &&
+ if ((origpte & ~(PG_M | PG_A)) == newpte)
+ goto unchanged;
+ goto validate;
+ } else {
+ /*
+ * Yes, fall through to validate the new mapping.
+ */
+ if ((origpte & PG_MANAGED) != 0)
+ om = PHYS_TO_VM_PAGE(opa);
+ }
+ } else {
+ /*
+ * Increment the counters.
+ */
+ if (wired)
+ pmap->pm_stats.wired_count++;
+ pmap_resident_count_inc(pmap, 1);
+ }
+
+ /*
+ * Enter on the PV list if part of our managed memory.
+ */
+ if ((m->oflags & VPO_UNMANAGED) == 0) {
+ newpte |= PG_MANAGED;
+ pv = get_pv_entry(pmap, &lock);
+ pv->pv_va = va;
+ CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ }
+
+validate:
+
+ /*
+ * Update the PTE.
+ */
+ newpte |= PG_A;
+ if ((access & VM_PROT_WRITE) != 0)
+ newpte |= PG_M;
+ if ((newpte & (PG_MANAGED | PG_RW)) == (PG_MANAGED | PG_RW))
+ vm_page_aflag_set(m, PGA_WRITEABLE);
+ if ((origpte & PG_V) != 0) {
+ invlva = FALSE;
+ origpte = pte_load_store(pte, newpte);
+ if ((origpte & PG_A) != 0 && (opa != pa ||
+ ((origpte & PG_NX) == 0 && (newpte & PG_NX) != 0)))
+ invlva = TRUE;
+ if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+ if ((origpte & PG_MANAGED) != 0)
+ vm_page_dirty(om);
+ if ((newpte & PG_RW) == 0)
+ invlva = TRUE;
+ }
+ if (opa != pa && (origpte & PG_MANAGED) != 0) {
+ if ((origpte & PG_A) != 0)
+ vm_page_aflag_set(om, PGA_REFERENCED);
+ CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
+ pmap_pvh_free(&om->md, pmap, va);
+ if ((om->aflags & PGA_WRITEABLE) != 0 &&
TAILQ_EMPTY(&om->md.pv_list) &&
((om->flags & PG_FICTITIOUS) != 0 ||
TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
vm_page_aflag_clear(om, PGA_WRITEABLE);
- if (invlva)
- pmap_invalidate_page(pmap, va);
- } else
- pte_store(pte, newpte);
- }
+ }
+ if (invlva)
+ pmap_invalidate_page(pmap, va);
+ } else
+ pte_store(pte, newpte);
+
+unchanged:
/*
* If both the page table page and the reservation are fully
@@ -3321,9 +3610,11 @@
if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
- pmap_promote_pde(pmap, pde, va);
-
- vm_page_unlock_queues();
+ pmap_promote_pde(pmap, pde, va, &lock);
+
+ if (lock != NULL)
+ rw_wunlock(lock);
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -3334,14 +3625,15 @@
* (3) a pv entry cannot be allocated without reclaiming another pv entry.
*/
static boolean_t
-pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
+pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+ struct rwlock **lockp)
{
pd_entry_t *pde, newpde;
vm_page_t free, mpde;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((mpde = pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) {
+ if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) {
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return (FALSE);
@@ -3364,7 +3656,8 @@
/*
* Abort this mapping if its PV entry could not be created.
*/
- if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) {
+ if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m),
+ lockp)) {
free = NULL;
if (pmap_unwire_pte_hold(pmap, va, mpde, &free)) {
pmap_invalidate_page(pmap, va);
@@ -3390,7 +3683,7 @@
*/
pde_store(pde, newpde);
- pmap_pde_mappings++;
+ atomic_add_long(&pmap_pde_mappings, 1);
CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
" in pmap %p", va, pmap);
return (TRUE);
@@ -3412,6 +3705,7 @@
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
+ struct rwlock *lock;
vm_offset_t va;
vm_page_t m, mpte;
vm_pindex_t diff, psize;
@@ -3420,21 +3714,24 @@
psize = atop(end - start);
mpte = NULL;
m = m_start;
- vm_page_lock_queues();
+ lock = NULL;
+ rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
(VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
- pmap_enter_pde(pmap, va, m, prot))
+ pmap_enter_pde(pmap, va, m, prot, &lock))
m = &m[NBPDR / PAGE_SIZE - 1];
else
mpte = pmap_enter_quick_locked(pmap, va, m, prot,
- mpte);
+ mpte, &lock);
m = TAILQ_NEXT(m, listq);
}
- vm_page_unlock_queues();
+ if (lock != NULL)
+ rw_wunlock(lock);
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -3450,17 +3747,21 @@
void
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
-
- vm_page_lock_queues();
+ struct rwlock *lock;
+
+ lock = NULL;
+ rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
- (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
- vm_page_unlock_queues();
+ (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
+ if (lock != NULL)
+ rw_wunlock(lock);
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
static vm_page_t
pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, vm_page_t mpte)
+ vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
{
vm_page_t free;
pt_entry_t *pte;
@@ -3469,7 +3770,7 @@
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
@@ -3494,7 +3795,9 @@
/*
* If the page table page is mapped, we just increment
- * the hold count, and activate it.
+ * the hold count, and activate it. Otherwise, we
+ * attempt to allocate a page table page. If this
+ * attempt fails, we don't retry. Instead, we give up.
*/
if (ptepa && (*ptepa & PG_V) != 0) {
if (*ptepa & PG_PS)
@@ -3502,8 +3805,11 @@
mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME);
mpte->wire_count++;
} else {
- mpte = _pmap_allocpte(pmap, ptepindex,
- M_NOWAIT);
+ /*
+ * Pass NULL instead of the PV list lock
+ * pointer, because we don't intend to sleep.
+ */
+ mpte = _pmap_allocpte(pmap, ptepindex, NULL);
if (mpte == NULL)
return (mpte);
}
@@ -3526,7 +3832,7 @@
* Enter on the PV list if part of our managed memory.
*/
if ((m->oflags & VPO_UNMANAGED) == 0 &&
- !pmap_try_insert_pv_entry(pmap, va, m)) {
+ !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
if (mpte != NULL) {
free = NULL;
if (pmap_unwire_pte_hold(pmap, va, mpte, &free)) {
@@ -3629,7 +3935,7 @@
PMAP_LOCK(pmap);
for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
size; pa += NBPDR) {
- pdpg = pmap_allocpde(pmap, addr, M_NOWAIT);
+ pdpg = pmap_allocpde(pmap, addr, NULL);
if (pdpg == NULL) {
/*
* The creation of mappings below is only an
@@ -3647,7 +3953,7 @@
pde_store(pde, pa | PG_PS | PG_M | PG_A |
PG_U | PG_RW | PG_V);
pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
- pmap_pde_mappings++;
+ atomic_add_long(&pmap_pde_mappings, 1);
} else {
/* Continue on if the PDE is already valid. */
pdpg->wire_count--;
@@ -3673,9 +3979,9 @@
{
pd_entry_t *pde;
pt_entry_t *pte;
- boolean_t are_queues_locked;
-
- are_queues_locked = FALSE;
+ boolean_t pv_lists_locked;
+
+ pv_lists_locked = FALSE;
/*
* Wiring is not a hardware characteristic so there is no need to
@@ -3686,11 +3992,11 @@
pde = pmap_pde(pmap, va);
if ((*pde & PG_PS) != 0) {
if (!wired != ((*pde & PG_W) == 0)) {
- if (!are_queues_locked) {
- are_queues_locked = TRUE;
- if (!mtx_trylock(&vm_page_queue_mtx)) {
+ if (!pv_lists_locked) {
+ pv_lists_locked = TRUE;
+ if (!rw_try_rlock(&pvh_global_lock)) {
PMAP_UNLOCK(pmap);
- vm_page_lock_queues();
+ rw_rlock(&pvh_global_lock);
goto retry;
}
}
@@ -3708,8 +4014,8 @@
atomic_clear_long(pte, PG_W);
}
out:
- if (are_queues_locked)
- vm_page_unlock_queues();
+ if (pv_lists_locked)
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -3725,6 +4031,7 @@
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
+ struct rwlock *lock;
vm_page_t free;
vm_offset_t addr;
vm_offset_t end_addr = src_addr + len;
@@ -3733,7 +4040,8 @@
if (dst_addr != src_addr)
return;
- vm_page_lock_queues();
+ lock = NULL;
+ rw_rlock(&pvh_global_lock);
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
@@ -3777,7 +4085,7 @@
continue;
if (srcptepaddr & PG_PS) {
- dstmpde = pmap_allocpde(dst_pmap, addr, M_NOWAIT);
+ dstmpde = pmap_allocpde(dst_pmap, addr, NULL);
if (dstmpde == NULL)
break;
pde = (pd_entry_t *)
@@ -3785,7 +4093,7 @@
pde = &pde[pmap_pde_index(addr)];
if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 ||
pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
- PG_PS_FRAME))) {
+ PG_PS_FRAME, &lock))) {
*pde = srcptepaddr & ~PG_W;
pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE);
} else
@@ -3815,14 +4123,15 @@
dstmpte->pindex == pmap_pde_pindex(addr))
dstmpte->wire_count++;
else if ((dstmpte = pmap_allocpte(dst_pmap,
- addr, M_NOWAIT)) == NULL)
+ addr, NULL)) == NULL)
goto out;
dst_pte = (pt_entry_t *)
PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
dst_pte = &dst_pte[pmap_pte_index(addr)];
if (*dst_pte == 0 &&
pmap_try_insert_pv_entry(dst_pmap, addr,
- PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
+ PHYS_TO_VM_PAGE(ptetemp & PG_FRAME),
+ &lock)) {
/*
* Clear the wired, modified, and
* accessed (referenced) bits
@@ -3849,7 +4158,9 @@
}
}
out:
- vm_page_unlock_queues();
+ if (lock != NULL)
+ rw_wunlock(lock);
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -3923,6 +4234,7 @@
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
struct md_page *pvh;
+ struct rwlock *lock;
pv_entry_t pv;
int loops = 0;
boolean_t rv;
@@ -3930,7 +4242,9 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
- vm_page_lock_queues();
+ rw_rlock(&pvh_global_lock);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ rw_rlock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
if (PV_PMAP(pv) == pmap) {
rv = TRUE;
@@ -3952,7 +4266,8 @@
break;
}
}
- vm_page_unlock_queues();
+ rw_runlock(lock);
+ rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -3970,13 +4285,13 @@
count = 0;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (count);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
count = pmap_pvh_wired_mappings(&m->md, count);
if ((m->flags & PG_FICTITIOUS) == 0) {
count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)),
count);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (count);
}
@@ -3992,7 +4307,7 @@
pt_entry_t *pte;
pv_entry_t pv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
@@ -4011,15 +4326,19 @@
boolean_t
pmap_page_is_mapped(vm_page_t m)
{
+ struct rwlock *lock;
boolean_t rv;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- vm_page_lock_queues();
+ rw_rlock(&pvh_global_lock);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ rw_rlock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
- vm_page_unlock_queues();
+ rw_runlock(lock);
+ rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -4041,21 +4360,23 @@
pv_entry_t pv;
struct md_page *pvh;
struct pv_chunk *pc, *npc;
- int field, idx;
+ struct rwlock *lock;
int64_t bit;
uint64_t inuse, bitmask;
- int allfree;
+ int allfree, field, freed, idx;
if (pmap != PCPU_GET(curpmap)) {
printf("warning: pmap_remove_pages called with non-current pmap\n");
return;
}
- vm_page_lock_queues();
+ lock = NULL;
+ rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
+ freed = 0;
for (field = 0; field < _NPCM; field++) {
- inuse = (~(pc->pc_map[field])) & pc_freemask[field];
+ inuse = ~pc->pc_map[field] & pc_freemask[field];
while (inuse != 0) {
bit = bsfq(inuse);
bitmask = 1UL << bit;
@@ -4109,10 +4430,9 @@
vm_page_dirty(m);
}
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+
/* Mark free */
- PV_STAT(pv_entry_frees++);
- PV_STAT(pv_entry_spare++);
- pv_entry_count--;
pc->pc_map[field] |= bitmask;
if ((tpte & PG_PS) != 0) {
pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
@@ -4120,7 +4440,8 @@
TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
if (TAILQ_EMPTY(&pvh->pv_list)) {
for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
- if (TAILQ_EMPTY(&mt->md.pv_list))
+ if ((mt->aflags & PGA_WRITEABLE) != 0 &&
+ TAILQ_EMPTY(&mt->md.pv_list))
vm_page_aflag_clear(mt, PGA_WRITEABLE);
}
mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
@@ -4136,7 +4457,8 @@
} else {
pmap_resident_count_dec(pmap, 1);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- if (TAILQ_EMPTY(&m->md.pv_list) &&
+ if ((m->aflags & PGA_WRITEABLE) != 0 &&
+ TAILQ_EMPTY(&m->md.pv_list) &&
(m->flags & PG_FICTITIOUS) == 0) {
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
if (TAILQ_EMPTY(&pvh->pv_list))
@@ -4144,21 +4466,21 @@
}
}
pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free);
+ freed++;
}
}
+ PV_STAT(atomic_add_long(&pv_entry_frees, freed));
+ PV_STAT(atomic_add_int(&pv_entry_spare, freed));
+ PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
if (allfree) {
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
- dump_drop_page(m->phys_addr);
- vm_page_unwire(m, 0);
- vm_page_free(m);
+ free_pv_chunk(pc);
}
}
+ if (lock != NULL)
+ rw_wunlock(lock);
pmap_invalidate_all(pmap);
- vm_page_unlock_queues();
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(free);
}
@@ -4186,11 +4508,11 @@
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return (FALSE);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
rv = pmap_is_modified_pvh(&m->md) ||
((m->flags & PG_FICTITIOUS) == 0 &&
pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -4207,7 +4529,7 @@
pmap_t pmap;
boolean_t rv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
rv = FALSE;
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
pmap = PV_PMAP(pv);
@@ -4258,11 +4580,11 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_referenced: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
rv = pmap_is_referenced_pvh(&m->md) ||
((m->flags & PG_FICTITIOUS) == 0 &&
pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -4278,7 +4600,7 @@
pmap_t pmap;
boolean_t rv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
rv = FALSE;
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
pmap = PV_PMAP(pv);
@@ -4317,7 +4639,7 @@
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -4335,8 +4657,9 @@
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, pv->pv_va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found"
- " a 2mpage in page %p's pv list", m));
+ KASSERT((*pde & PG_PS) == 0,
+ ("pmap_remove_write: found a 2mpage in page %p's pv list",
+ m));
pte = pmap_pde_to_pte(pde, pv->pv_va);
retry:
oldpte = *pte;
@@ -4351,7 +4674,7 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -4379,7 +4702,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -4437,7 +4760,7 @@
} while ((pv = pvn) != NULL && pv != pvf);
}
out:
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rtval);
}
@@ -4467,7 +4790,7 @@
*/
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -4516,7 +4839,7 @@
}
PMAP_UNLOCK(pmap);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -4536,7 +4859,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_reference: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -4576,7 +4899,7 @@
}
PMAP_UNLOCK(pmap);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/ptrace_machdep.c
--- a/head/sys/amd64/amd64/ptrace_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/ptrace_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/ptrace_machdep.c 232520 2012-03-04 20:24:28Z tijl $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/ptrace_machdep.c 238669 2012-07-21 13:06:37Z kib $");
#include "opt_compat.h"
@@ -50,6 +50,7 @@
switch (req) {
case PT_GETXSTATE:
+ fpugetregs(td);
savefpu = (char *)(get_pcb_user_save_td(td) + 1);
error = copyout(savefpu, addr,
cpu_max_ext_state_size - sizeof(struct savefpu));
@@ -62,8 +63,10 @@
}
savefpu = malloc(data, M_TEMP, M_WAITOK);
error = copyin(addr, savefpu, data);
- if (error == 0)
+ if (error == 0) {
+ fpugetregs(td);
error = fpusetxstate(td, savefpu, data);
+ }
free(savefpu, M_TEMP);
break;
@@ -89,11 +92,13 @@
switch (req) {
case PT_I386_GETXMMREGS:
+ fpugetregs(td);
error = copyout(get_pcb_user_save_td(td), addr,
sizeof(*fpstate));
break;
case PT_I386_SETXMMREGS:
+ fpugetregs(td);
fpstate = get_pcb_user_save_td(td);
error = copyin(addr, fpstate, sizeof(*fpstate));
fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/trap.c
--- a/head/sys/amd64/amd64/trap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/trap.c Wed Jul 25 16:40:53 2012 +0300
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/trap.c 233781 2012-04-02 15:07:22Z jhb $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/trap.c 238623 2012-07-19 19:09:12Z kib $");
/*
* AMD64 Trap and System call handling
@@ -328,7 +328,7 @@
break;
case T_ARITHTRAP: /* arithmetic trap */
- ucode = fputrap();
+ ucode = fputrap_x87();
if (ucode == -1)
goto userout;
i = SIGFPE;
@@ -442,7 +442,9 @@
break;
case T_XMMFLT: /* SIMD floating-point exception */
- ucode = 0; /* XXX */
+ ucode = fputrap_sse();
+ if (ucode == -1)
+ goto userout;
i = SIGFPE;
break;
}
@@ -518,9 +520,8 @@
frame->tf_rip = (long)fsbase_load_fault;
goto out;
}
- if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
- frame->tf_rip =
- (long)PCPU_GET(curpcb)->pcb_onfault;
+ if (curpcb->pcb_onfault != NULL) {
+ frame->tf_rip = (long)curpcb->pcb_onfault;
goto out;
}
break;
@@ -706,7 +707,7 @@
* it normally, and panic immediately.
*/
if (!usermode && (td->td_intr_nesting_level != 0 ||
- PCPU_GET(curpcb)->pcb_onfault == NULL)) {
+ curpcb->pcb_onfault == NULL)) {
trap_fatal(frame, eva);
return (-1);
}
@@ -762,8 +763,8 @@
nogo:
if (!usermode) {
if (td->td_intr_nesting_level == 0 &&
- PCPU_GET(curpcb)->pcb_onfault != NULL) {
- frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
+ curpcb->pcb_onfault != NULL) {
+ frame->tf_rip = (long)curpcb->pcb_onfault;
return (0);
}
trap_fatal(frame, eva);
@@ -972,4 +973,15 @@
syscallname(td->td_proc, sa.code)));
syscallret(td, error, &sa);
+
+ /*
+ * If the user-supplied value of %rip is not a canonical
+ * address, then some CPUs will trigger a ring 0 #GP during
+ * the sysret instruction. However, the fault handler would
+ * execute in ring 0 with the user's %gs and %rsp which would
+ * not be safe. Instead, use the full return path which
+ * catches the problem safely.
+ */
+ if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)
+ set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/amd64/vm_machdep.c
--- a/head/sys/amd64/amd64/vm_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/amd64/vm_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -41,7 +41,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/amd64/vm_machdep.c 231441 2012-02-10 21:26:25Z kib $");
+__FBSDID("$FreeBSD: head/sys/amd64/amd64/vm_machdep.c 238623 2012-07-19 19:09:12Z kib $");
#include "opt_isa.h"
#include "opt_cpu.h"
@@ -90,6 +90,10 @@
static volatile u_int cpu_reset_proxy_active;
#endif
+CTASSERT((struct thread **)OFFSETOF_CURTHREAD ==
+ &((struct pcpu *)NULL)->pc_curthread);
+CTASSERT((struct pcb **)OFFSETOF_CURPCB == &((struct pcpu *)NULL)->pc_curpcb);
+
struct savefpu *
get_pcb_user_save_td(struct thread *td)
{
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/conf/GENERIC
--- a/head/sys/amd64/conf/GENERIC Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/conf/GENERIC Wed Jul 25 16:40:53 2012 +0300
@@ -16,7 +16,7 @@
# If you are in doubt as to the purpose or necessity of a line, check first
# in NOTES.
#
-# $FreeBSD: head/sys/amd64/conf/GENERIC 234504 2012-04-20 21:37:42Z brooks $
+# $FreeBSD: head/sys/amd64/conf/GENERIC 237901 2012-07-01 08:10:49Z delphij $
cpu HAMMER
ident GENERIC
@@ -28,6 +28,7 @@
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
options INET6 # IPv6 communications protocols
+options TCP_OFFLOAD # TCP offload
options SCTP # Stream Control Transmission Protocol
options FFS # Berkeley Fast Filesystem
options SOFTUPDATES # Enable FFS soft updates support
@@ -44,6 +45,7 @@
options PROCFS # Process filesystem (requires PSEUDOFS)
options PSEUDOFS # Pseudo-filesystem framework
options GEOM_PART_GPT # GUID Partition Tables.
+options GEOM_RAID # Soft RAID functionality.
options GEOM_LABEL # Provides labelization
options COMPAT_FREEBSD32 # Compatible with i386 binaries
options COMPAT_FREEBSD4 # Compatible with FreeBSD4
@@ -66,6 +68,7 @@
options MAC # TrustedBSD MAC Framework
options KDTRACE_FRAME # Ensure frames are compiled in
options KDTRACE_HOOKS # Kernel DTrace hooks
+options DDB_CTF # Kernel ELF linker loads CTF data
options INCLUDE_CONFIG_FILE # Include this file in kernel
# Debugging support. Always need this:
@@ -75,7 +78,6 @@
# For full debugger support use this instead:
options DDB # Support DDB.
options GDB # Support remote GDB.
-options DDB_CTF # kernel ELF linker loads CTF data
options DEADLKRES # Enable the deadlock resolver
options INVARIANTS # Enable calls of extra sanity checking
options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
@@ -150,6 +152,7 @@
device ips # IBM (Adaptec) ServeRAID
device mly # Mylex AcceleRAID/eXtremeRAID
device twa # 3ware 9000 series PATA/SATA RAID
+device tws # LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller
# RAID controllers
device aac # Adaptec FSA RAID
@@ -160,7 +163,6 @@
#XXX pointer/int warnings
#device pst # Promise Supertrak SX6000
device twe # 3ware ATA RAID
-device tws # LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller
# atkbdc0 controls both the keyboard and the PS/2 mouse
device atkbdc # AT keyboard controller
@@ -272,6 +274,8 @@
device ath_pci # Atheros pci/cardbus glue
device ath_hal # pci/cardbus chip support
options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors
+options AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation
+options ATH_ENABLE_11N # Enable 802.11n support for AR5416 and later
device ath_rate_sample # SampleRate tx rate control for ath
#device bwi # Broadcom BCM430x/BCM431x wireless NICs.
#device bwn # Broadcom BCM43xx wireless NICs.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/atomic.h
--- a/head/sys/amd64/include/atomic.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/atomic.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/amd64/include/atomic.h 236456 2012-06-02 18:10:16Z kib $
*/
#ifndef _MACHINE_ATOMIC_H_
#define _MACHINE_ATOMIC_H_
@@ -81,8 +81,9 @@
u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
u_long atomic_fetchadd_long(volatile u_long *p, u_long v);
-#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \
-u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \
+#define ATOMIC_LOAD(TYPE, LOP) \
+u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
+#define ATOMIC_STORE(TYPE) \
void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
#else /* !KLD_MODULE && __GNUCLIKE_ASM */
@@ -210,37 +211,43 @@
return (v);
}
+/*
+ * We assume that a = b will do atomic loads and stores. Due to the
+ * IA32 memory model, a simple store guarantees release semantics.
+ *
+ * However, loads may pass stores, so for atomic_load_acq we have to
+ * ensure a Store/Load barrier to do the load in SMP kernels. We use
+ * "lock cmpxchg" as recommended by the AMD Software Optimization
+ * Guide, and not mfence. For UP kernels, however, the cache of the
+ * single processor is always consistent, so we only need to take care
+ * of the compiler.
+ */
+#define ATOMIC_STORE(TYPE) \
+static __inline void \
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{ \
+ __asm __volatile("" : : : "memory"); \
+ *p = v; \
+} \
+struct __hack
+
#if defined(_KERNEL) && !defined(SMP)
-/*
- * We assume that a = b will do atomic loads and stores. However, on a
- * PentiumPro or higher, reads may pass writes, so for that case we have
- * to use a serializing instruction (i.e. with LOCK) to do the load in
- * SMP kernels. For UP kernels, however, the cache of the single processor
- * is always consistent, so we only need to take care of compiler.
- */
-#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \
+#define ATOMIC_LOAD(TYPE, LOP) \
static __inline u_##TYPE \
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
{ \
u_##TYPE tmp; \
\
tmp = *p; \
- __asm __volatile ("" : : : "memory"); \
+ __asm __volatile("" : : : "memory"); \
return (tmp); \
} \
- \
-static __inline void \
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{ \
- __asm __volatile ("" : : : "memory"); \
- *p = v; \
-} \
struct __hack
#else /* !(_KERNEL && !SMP) */
-#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \
+#define ATOMIC_LOAD(TYPE, LOP) \
static __inline u_##TYPE \
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
{ \
@@ -254,19 +261,6 @@
\
return (res); \
} \
- \
-/* \
- * The XCHG instruction asserts LOCK automagically. \
- */ \
-static __inline void \
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{ \
- __asm __volatile(SOP \
- : "=m" (*p), /* 0 */ \
- "+r" (v) /* 1 */ \
- : "m" (*p) /* 2 */ \
- : "memory"); \
-} \
struct __hack
#endif /* _KERNEL && !SMP */
@@ -293,13 +287,19 @@
ATOMIC_ASM(add, long, "addq %1,%0", "ir", v);
ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v);
-ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0");
-ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
-ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0");
-ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0");
+ATOMIC_LOAD(char, "cmpxchgb %b0,%1");
+ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
+ATOMIC_LOAD(int, "cmpxchgl %0,%1");
+ATOMIC_LOAD(long, "cmpxchgq %0,%1");
+
+ATOMIC_STORE(char);
+ATOMIC_STORE(short);
+ATOMIC_STORE(int);
+ATOMIC_STORE(long);
#undef ATOMIC_ASM
-#undef ATOMIC_STORE_LOAD
+#undef ATOMIC_LOAD
+#undef ATOMIC_STORE
#ifndef WANT_FUNCTIONS
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/cpufunc.h
--- a/head/sys/amd64/include/cpufunc.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/cpufunc.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/amd64/include/cpufunc.h 232227 2012-02-27 17:28:47Z jhb $
+ * $FreeBSD: head/sys/amd64/include/cpufunc.h 238311 2012-07-09 20:55:39Z jhb $
*/
/*
@@ -107,6 +107,13 @@
}
static __inline void
+clts(void)
+{
+
+ __asm __volatile("clts");
+}
+
+static __inline void
disable_intr(void)
{
__asm __volatile("cli" : : : "memory");
@@ -273,6 +280,15 @@
__asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port));
}
+static __inline u_long
+popcntq(u_long mask)
+{
+ u_long result;
+
+ __asm __volatile("popcntq %1,%0" : "=r" (result) : "rm" (mask));
+ return (result);
+}
+
static __inline void
mfence(void)
{
@@ -409,6 +425,25 @@
return (data);
}
+static __inline u_long
+rxcr(u_int reg)
+{
+ u_int low, high;
+
+ __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg));
+ return (low | ((uint64_t)high << 32));
+}
+
+static __inline void
+load_xcr(u_int reg, u_long val)
+{
+ u_int low, high;
+
+ low = val;
+ high = val >> 32;
+ __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high));
+}
+
/*
* Global TLB flush (except for thise for pages marked PG_G)
*/
@@ -674,6 +709,9 @@
int breakpoint(void);
u_int bsfl(u_int mask);
u_int bsrl(u_int mask);
+void clflush(u_long addr);
+void clts(void);
+void cpuid_count(u_int ax, u_int cx, u_int *p);
void disable_intr(void);
void do_cpuid(u_int ax, u_int *p);
void enable_intr(void);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/elf.h
--- a/head/sys/amd64/include/elf.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/elf.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/amd64/include/elf.h 237430 2012-06-22 06:38:31Z kib $
*/
#ifndef _MACHINE_ELF_H_
@@ -94,6 +94,7 @@
#define AT_NCPUS 19 /* Number of CPUs. */
#define AT_PAGESIZES 20 /* Pagesizes. */
#define AT_PAGESIZESLEN 21 /* Number of pagesizes. */
+#define AT_TIMEKEEP 22 /* Pointer to timehands. */
#define AT_STACKPROT 23 /* Initial stack protection. */
#define AT_COUNT 24 /* Count of defined aux entry types. */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/fpu.h
--- a/head/sys/amd64/include/fpu.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/fpu.h Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
* SUCH DAMAGE.
*
* from: @(#)npx.h 5.3 (Berkeley) 1/18/91
- * $FreeBSD: head/sys/amd64/include/fpu.h 233044 2012-03-16 20:24:30Z tijl $
+ * $FreeBSD: head/sys/amd64/include/fpu.h 238598 2012-07-18 15:43:47Z kib $
*/
/*
@@ -62,7 +62,8 @@
char *xfpustate, size_t xfpustate_size);
int fpusetxstate(struct thread *td, char *xfpustate,
size_t xfpustate_size);
-int fputrap(void);
+int fputrap_sse(void);
+int fputrap_x87(void);
void fpuuserinited(struct thread *td);
struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags);
void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/in_cksum.h
--- a/head/sys/amd64/include/in_cksum.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/in_cksum.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* from tahoe: in_cksum.c 1.2 86/01/05
* from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
* from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp
- * $FreeBSD$
+ * $FreeBSD: head/sys/amd64/include/in_cksum.h 235941 2012-05-24 22:00:48Z bz $
*/
#ifndef _MACHINE_IN_CKSUM_H_
@@ -43,6 +43,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -69,9 +70,12 @@
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/intr_machdep.h
--- a/head/sys/amd64/include/intr_machdep.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/intr_machdep.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/amd64/include/intr_machdep.h 234207 2012-04-13 07:15:40Z avg $
+ * $FreeBSD: head/sys/amd64/include/intr_machdep.h 234989 2012-05-03 21:44:01Z attilio $
*/
#ifndef __MACHINE_INTR_MACHDEP_H__
@@ -140,9 +140,7 @@
enum intr_trigger elcr_read_trigger(u_int irq);
void elcr_resume(void);
void elcr_write_trigger(u_int irq, enum intr_trigger trigger);
-#ifdef SMP
void intr_add_cpu(u_int cpu);
-#endif
int intr_add_handler(const char *name, int vector, driver_filter_t filter,
driver_intr_t handler, void *arg, enum intr_type flags,
void **cookiep);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/md_var.h
--- a/head/sys/amd64/include/md_var.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/md_var.h Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/amd64/include/md_var.h 230426 2012-01-21 17:45:27Z kib $
+ * $FreeBSD: head/sys/amd64/include/md_var.h 238450 2012-07-14 15:48:30Z kib $
*/
#ifndef _MACHINE_MD_VAR_H_
@@ -57,6 +57,7 @@
extern u_int cpu_procinfo2;
extern char cpu_vendor[];
extern u_int cpu_vendor_id;
+extern char ctx_switch_xsave[];
extern char kstack[];
extern char sigcode[];
extern int szsigcode;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/pcb.h
--- a/head/sys/amd64/include/pcb.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/pcb.h Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* from: @(#)pcb.h 5.10 (Berkeley) 5/12/91
- * $FreeBSD: head/sys/amd64/include/pcb.h 230426 2012-01-21 17:45:27Z kib $
+ * $FreeBSD: head/sys/amd64/include/pcb.h 237037 2012-06-13 22:53:56Z jkim $
*/
#ifndef _AMD64_PCB_H_
@@ -91,9 +91,20 @@
/* local tss, with i/o bitmap; NULL for common */
struct amd64tss *pcb_tssp;
+ /* model specific registers */
+ register_t pcb_efer;
+ register_t pcb_star;
+ register_t pcb_lstar;
+ register_t pcb_cstar;
+ register_t pcb_sfmask;
+ register_t pcb_xsmask;
+
+ /* fpu context for suspend/resume */
+ void *pcb_fpususpend;
+
struct savefpu *pcb_save;
- uint64_t pcb_pad[2];
+ uint64_t pcb_pad[3];
};
#ifdef _KERNEL
@@ -130,7 +141,8 @@
}
void makectx(struct trapframe *, struct pcb *);
-int savectx(struct pcb *);
+int savectx(struct pcb *) __returns_twice;
+void resumectx(struct pcb *);
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/pcpu.h
--- a/head/sys/amd64/include/pcpu.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/pcpu.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/amd64/include/pcpu.h 230260 2012-01-17 07:21:23Z kib $
+ * $FreeBSD: head/sys/amd64/include/pcpu.h 238723 2012-07-23 19:16:31Z kib $
*/
#ifndef _MACHINE_PCPU_H_
@@ -216,16 +216,36 @@
#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member)
#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val)
+#define OFFSETOF_CURTHREAD 0
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnull-dereference"
+#endif
static __inline __pure2 struct thread *
__curthread(void)
{
struct thread *td;
- __asm("movq %%gs:0,%0" : "=r" (td));
+ __asm("movq %%gs:%1,%0" : "=r" (td)
+ : "m" (*(char *)OFFSETOF_CURTHREAD));
return (td);
}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
#define curthread (__curthread())
+#define OFFSETOF_CURPCB 32
+static __inline __pure2 struct pcb *
+__curpcb(void)
+{
+ struct pcb *pcb;
+
+ __asm("movq %%gs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB));
+ return (pcb);
+}
+#define curpcb (__curpcb())
+
#define IS_BSP() (PCPU_GET(cpuid) == 0)
#else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/pmap.h
--- a/head/sys/amd64/include/pmap.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/pmap.h Wed Jul 25 16:40:53 2012 +0300
@@ -39,7 +39,7 @@
*
* from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
* from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
- * $FreeBSD: head/sys/amd64/include/pmap.h 222813 2011-06-07 08:46:13Z attilio $
+ * $FreeBSD: head/sys/amd64/include/pmap.h 237168 2012-06-16 18:56:19Z alc $
*/
#ifndef _MACHINE_PMAP_H_
@@ -295,7 +295,7 @@
pmap_t pc_pmap;
TAILQ_ENTRY(pv_chunk) pc_list;
uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */
- uint64_t pc_spare[2];
+ TAILQ_ENTRY(pv_chunk) pc_lru;
struct pv_entry pc_pventry[_NPCPV];
};
@@ -309,6 +309,7 @@
extern vm_offset_t virtual_end;
#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode)
+#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
void pmap_bootstrap(vm_paddr_t *);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/smp.h
--- a/head/sys/amd64/include/smp.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/smp.h Wed Jul 25 16:40:53 2012 +0300
@@ -6,7 +6,7 @@
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
* ----------------------------------------------------------------------------
*
- * $FreeBSD: head/sys/amd64/include/smp.h 222853 2011-06-08 08:12:15Z avg $
+ * $FreeBSD: head/sys/amd64/include/smp.h 236938 2012-06-12 00:14:54Z iwasaki $
*
*/
@@ -59,6 +59,7 @@
void cpustop_handler(void);
void cpususpend_handler(void);
void init_secondary(void);
+void ipi_startup(int apic_id, int vector);
void ipi_all_but_self(u_int ipi);
void ipi_bitmap_handler(struct trapframe frame);
void ipi_cpu(int cpu, u_int ipi);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/vdso.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/amd64/include/vdso.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: head/sys/amd64/include/vdso.h 237433 2012-06-22 07:06:40Z kib $ */
+
+#include <x86/vdso.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/include/vmparam.h
--- a/head/sys/amd64/include/vmparam.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/include/vmparam.h Wed Jul 25 16:40:53 2012 +0300
@@ -38,7 +38,7 @@
* SUCH DAMAGE.
*
* from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91
- * $FreeBSD: head/sys/amd64/include/vmparam.h 221855 2011-05-13 19:35:01Z mdf $
+ * $FreeBSD: head/sys/amd64/include/vmparam.h 234743 2012-04-27 22:27:21Z rmh $
*/
@@ -54,7 +54,7 @@
*/
#define MAXTSIZ (128UL*1024*1024) /* max text size */
#ifndef DFLDSIZ
-#define DFLDSIZ (128UL*1024*1024) /* initial data size limit */
+#define DFLDSIZ (32768UL*1024*1024) /* initial data size limit */
#endif
#ifndef MAXDSIZ
#define MAXDSIZ (32768UL*1024*1024) /* max data size */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/linux32/linux.h
--- a/head/sys/amd64/linux32/linux.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/linux32/linux.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD: head/sys/amd64/linux32/linux.h 230132 2012-01-15 13:23:18Z uqs $
+ * $FreeBSD: head/sys/amd64/linux32/linux.h 235063 2012-05-05 19:42:38Z netchild $
*/
#ifndef _AMD64_LINUX_H_
@@ -42,6 +42,7 @@
#define ldebug(name) isclr(linux_debug_map, LINUX_SYS_linux_ ## name)
#define ARGS(nm, fmt) "linux(%ld): "#nm"("fmt")\n", (long)td->td_proc->p_pid
#define LMSG(fmt) "linux(%ld): "fmt"\n", (long)td->td_proc->p_pid
+#define LINUX_DTRACE linuxulator32
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_LINUX);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/linux32/linux32_dummy.c
--- a/head/sys/amd64/linux32/linux32_dummy.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/linux32/linux32_dummy.c Wed Jul 25 16:40:53 2012 +0300
@@ -27,16 +27,25 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_dummy.c 234352 2012-04-16 21:22:02Z jkim $");
+__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_dummy.c 235063 2012-05-05 19:42:38Z netchild $");
+
+#include "opt_compat.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/sdt.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <amd64/linux32/linux.h>
#include <amd64/linux32/linux32_proto.h>
+#include <compat/linux/linux_dtrace.h>
#include <compat/linux/linux_util.h>
+/* DTrace init */
+LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
+
DUMMY(stime);
DUMMY(olduname);
DUMMY(syslog);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/linux32/linux32_proto.h
--- a/head/sys/amd64/linux32/linux32_proto.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/linux32/linux32_proto.h Wed Jul 25 16:40:53 2012 +0300
@@ -2,8 +2,8 @@
* System call prototypes.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: head/sys/amd64/linux32/linux32_proto.h 234360 2012-04-16 23:17:29Z jkim $
- * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 234359 2012-04-16 23:16:18Z jkim
+ * $FreeBSD: head/sys/amd64/linux32/linux32_proto.h 236027 2012-05-25 21:52:57Z ed $
+ * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed
*/
#ifndef _LINUX_SYSPROTO_H_
@@ -60,8 +60,8 @@
};
struct linux_execve_args {
char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
- char argp_l_[PADL_(u_int32_t *)]; u_int32_t * argp; char argp_r_[PADR_(u_int32_t *)];
- char envp_l_[PADL_(u_int32_t *)]; u_int32_t * envp; char envp_r_[PADR_(u_int32_t *)];
+ char argp_l_[PADL_(uint32_t *)]; uint32_t * argp; char argp_r_[PADR_(uint32_t *)];
+ char envp_l_[PADL_(uint32_t *)]; uint32_t * envp; char envp_r_[PADR_(uint32_t *)];
};
struct linux_chdir_args {
char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/linux32/linux32_syscall.h
--- a/head/sys/amd64/linux32/linux32_syscall.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/linux32/linux32_syscall.h Wed Jul 25 16:40:53 2012 +0300
@@ -2,8 +2,8 @@
* System call numbers.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: head/sys/amd64/linux32/linux32_syscall.h 234360 2012-04-16 23:17:29Z jkim $
- * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 234359 2012-04-16 23:16:18Z jkim
+ * $FreeBSD: head/sys/amd64/linux32/linux32_syscall.h 236027 2012-05-25 21:52:57Z ed $
+ * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed
*/
#define LINUX_SYS_exit 1
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/linux32/linux32_syscalls.c
--- a/head/sys/amd64/linux32/linux32_syscalls.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/linux32/linux32_syscalls.c Wed Jul 25 16:40:53 2012 +0300
@@ -2,8 +2,8 @@
* System call names.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: head/sys/amd64/linux32/linux32_syscalls.c 234360 2012-04-16 23:17:29Z jkim $
- * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 234359 2012-04-16 23:16:18Z jkim
+ * $FreeBSD: head/sys/amd64/linux32/linux32_syscalls.c 236027 2012-05-25 21:52:57Z ed $
+ * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed
*/
const char *linux_syscallnames[] = {
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/linux32/linux32_sysent.c
--- a/head/sys/amd64/linux32/linux32_sysent.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/linux32/linux32_sysent.c Wed Jul 25 16:40:53 2012 +0300
@@ -2,8 +2,8 @@
* System call switch table.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: head/sys/amd64/linux32/linux32_sysent.c 234360 2012-04-16 23:17:29Z jkim $
- * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 234359 2012-04-16 23:16:18Z jkim
+ * $FreeBSD: head/sys/amd64/linux32/linux32_sysent.c 236027 2012-05-25 21:52:57Z ed $
+ * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed
*/
#include "opt_compat.h"
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/linux32/linux32_systrace_args.c
--- a/head/sys/amd64/linux32/linux32_systrace_args.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/linux32/linux32_systrace_args.c Wed Jul 25 16:40:53 2012 +0300
@@ -2,7 +2,7 @@
* System call argument to DTrace register array converstion.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: head/sys/amd64/linux32/linux32_systrace_args.c 234360 2012-04-16 23:17:29Z jkim $
+ * $FreeBSD: head/sys/amd64/linux32/linux32_systrace_args.c 236027 2012-05-25 21:52:57Z ed $
* This file is part of the DTrace syscall provider.
*/
@@ -94,8 +94,8 @@
case 11: {
struct linux_execve_args *p = params;
uarg[0] = (intptr_t) p->path; /* char * */
- uarg[1] = (intptr_t) p->argp; /* u_int32_t * */
- uarg[2] = (intptr_t) p->envp; /* u_int32_t * */
+ uarg[1] = (intptr_t) p->argp; /* uint32_t * */
+ uarg[2] = (intptr_t) p->envp; /* uint32_t * */
*n_args = 3;
break;
}
@@ -2401,10 +2401,10 @@
p = "char *";
break;
case 1:
- p = "u_int32_t *";
+ p = "uint32_t *";
break;
case 2:
- p = "u_int32_t *";
+ p = "uint32_t *";
break;
default:
break;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/amd64/linux32/syscalls.master
--- a/head/sys/amd64/linux32/syscalls.master Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/amd64/linux32/syscalls.master Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
- $FreeBSD: head/sys/amd64/linux32/syscalls.master 234359 2012-04-16 23:16:18Z jkim $
+ $FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed $
; @(#)syscalls.master 8.1 (Berkeley) 7/19/93
; System call name/number master file (or rather, slave, from LINUX).
@@ -54,8 +54,8 @@
l_int mode); }
9 AUE_LINK STD { int linux_link(char *path, char *to); }
10 AUE_UNLINK STD { int linux_unlink(char *path); }
-11 AUE_EXECVE STD { int linux_execve(char *path, u_int32_t *argp, \
- u_int32_t *envp); }
+11 AUE_EXECVE STD { int linux_execve(char *path, uint32_t *argp, \
+ uint32_t *envp); }
12 AUE_CHDIR STD { int linux_chdir(char *path); }
13 AUE_NULL STD { int linux_time(l_time_t *tm); }
14 AUE_MKNOD STD { int linux_mknod(char *path, l_int mode, \
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/cd9660/cd9660_node.c
--- a/head/sys/fs/cd9660/cd9660_node.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/cd9660/cd9660_node.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/fs/cd9660/cd9660_node.c 234607 2012-04-23 14:10:34Z trasz $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -65,7 +65,6 @@
} */ *ap;
{
struct vnode *vp = ap->a_vp;
- struct thread *td = ap->a_td;
struct iso_node *ip = VTOI(vp);
int error = 0;
@@ -74,7 +73,7 @@
* so that it can be reused immediately.
*/
if (ip->inode.iso_mode == 0)
- vrecycle(vp, td);
+ vrecycle(vp);
return error;
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/cd9660/cd9660_vfsops.c
--- a/head/sys/fs/cd9660/cd9660_vfsops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/cd9660/cd9660_vfsops.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/cd9660/cd9660_vfsops.c 232485 2012-03-04 09:48:58Z kevlo $");
+__FBSDID("$FreeBSD: head/sys/fs/cd9660/cd9660_vfsops.c 238697 2012-07-22 15:40:31Z kevlo $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -133,7 +133,7 @@
int error;
accmode_t accmode;
struct nameidata ndp;
- struct iso_mnt *imp = 0;
+ struct iso_mnt *imp = NULL;
td = curthread;
@@ -214,7 +214,7 @@
int iso_bsize;
int iso_blknum;
int joliet_level;
- struct iso_volume_descriptor *vdp = 0;
+ struct iso_volume_descriptor *vdp = NULL;
struct iso_primary_descriptor *pri = NULL;
struct iso_sierra_primary_descriptor *pri_sierra = NULL;
struct iso_supplementary_descriptor *sup = NULL;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/devfs/devfs_vnops.c
--- a/head/sys/fs/devfs/devfs_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/devfs/devfs_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
* @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95
* From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
*
- * $FreeBSD: head/sys/fs/devfs/devfs_vnops.c 231949 2012-02-21 01:05:12Z kib $
+ * $FreeBSD: head/sys/fs/devfs/devfs_vnops.c 238029 2012-07-02 21:01:03Z kib $
*/
/*
@@ -1170,18 +1170,14 @@
if (ioflag & O_DIRECT)
ioflag |= IO_DIRECT;
- if ((flags & FOF_OFFSET) == 0)
- uio->uio_offset = fp->f_offset;
-
+ foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
error = dsw->d_read(dev, uio, ioflag);
if (uio->uio_resid != resid || (error == 0 && resid != 0))
vfs_timestamp(&dev->si_atime);
td->td_fpop = fpop;
dev_relthread(dev, ref);
- if ((flags & FOF_OFFSET) == 0)
- fp->f_offset = uio->uio_offset;
- fp->f_nextoff = uio->uio_offset;
+ foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
return (error);
}
@@ -1648,8 +1644,7 @@
ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
if (ioflag & O_DIRECT)
ioflag |= IO_DIRECT;
- if ((flags & FOF_OFFSET) == 0)
- uio->uio_offset = fp->f_offset;
+ foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
resid = uio->uio_resid;
@@ -1661,9 +1656,7 @@
td->td_fpop = fpop;
dev_relthread(dev, ref);
- if ((flags & FOF_OFFSET) == 0)
- fp->f_offset = uio->uio_offset;
- fp->f_nextoff = uio->uio_offset;
+ foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
return (error);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ext2fs/ext2_inode.c
--- a/head/sys/fs/ext2fs/ext2_inode.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ext2fs/ext2_inode.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)ffs_inode.c 8.5 (Berkeley) 12/30/93
- * $FreeBSD: head/sys/fs/ext2fs/ext2_inode.c 228583 2011-12-16 15:47:43Z pfg $
+ * $FreeBSD: head/sys/fs/ext2fs/ext2_inode.c 234607 2012-04-23 14:10:34Z trasz $
*/
#include <sys/param.h>
@@ -249,7 +249,7 @@
bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof(newblks));
bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof(oldblks));
oip->i_size = osize;
- error = vtruncbuf(ovp, cred, td, length, (int)fs->e2fs_bsize);
+ error = vtruncbuf(ovp, cred, length, (int)fs->e2fs_bsize);
if (error && (allerror == 0))
allerror = error;
vnode_pager_setsize(ovp, length);
@@ -498,7 +498,7 @@
* so that it can be reused immediately.
*/
if (ip->i_mode == 0)
- vrecycle(vp, td);
+ vrecycle(vp);
return (error);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ext2fs/ext2_lookup.c
--- a/head/sys/fs/ext2fs/ext2_lookup.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ext2fs/ext2_lookup.c Wed Jul 25 16:40:53 2012 +0300
@@ -38,7 +38,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94
- * $FreeBSD: head/sys/fs/ext2fs/ext2_lookup.c 231949 2012-02-21 01:05:12Z kib $
+ * $FreeBSD: head/sys/fs/ext2fs/ext2_lookup.c 235508 2012-05-16 15:53:38Z pfg $
*/
#include <sys/param.h>
@@ -115,6 +115,8 @@
static int ext2_dirbadentry(struct vnode *dp, struct ext2fs_direct_2 *de,
int entryoffsetinblock);
+static int ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp,
+ struct componentname *cnp, ino_t *dd_ino);
/*
* Vnode op for reading directories.
@@ -285,7 +287,14 @@
struct componentname *a_cnp;
} */ *ap;
{
- struct vnode *vdp; /* vnode for directory being searched */
+
+ return (ext2_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
+}
+
+static int
+ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
+ ino_t *dd_ino)
+{
struct inode *dp; /* inode for directory being searched */
struct buf *bp; /* a buffer of directory entries */
struct ext2fs_direct_2 *ep; /* the current directory entry */
@@ -305,22 +314,22 @@
doff_t enduseful; /* pointer past last used dir slot */
u_long bmask; /* block offset mask */
int namlen, error;
- struct vnode **vpp = ap->a_vpp;
- struct componentname *cnp = ap->a_cnp;
struct ucred *cred = cnp->cn_cred;
int flags = cnp->cn_flags;
int nameiop = cnp->cn_nameiop;
- ino_t ino;
+ ino_t ino, ino1;
int ltype;
- int DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->e2fs_bsize;
+ int DIRBLKSIZ = VTOI(vdp)->i_e2fs->e2fs_bsize;
+ if (vpp != NULL)
+ *vpp = NULL;
+
+ dp = VTOI(vdp);
+ bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
+restart:
bp = NULL;
slotoffset = -1;
- *vpp = NULL;
- vdp = ap->a_dvp;
- dp = VTOI(vdp);
- bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
/*
* We now have a segment name to search for, and a directory to search.
@@ -536,10 +545,12 @@
* Insert name into cache (as non-existent) if appropriate.
*/
if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
- cache_enter(vdp, *vpp, cnp);
+ cache_enter(vdp, NULL, cnp);
return (ENOENT);
found:
+ if (dd_ino != NULL)
+ *dd_ino = ino;
if (numdirpasses == 2)
nchstats.ncs_pass2++;
/*
@@ -582,6 +593,8 @@
dp->i_count = 0;
else
dp->i_count = dp->i_offset - prevoff;
+ if (dd_ino != NULL)
+ return (0);
if (dp->i_number == ino) {
VREF(vdp);
*vpp = vdp;
@@ -622,6 +635,8 @@
*/
if (dp->i_number == ino)
return (EISDIR);
+ if (dd_ino != NULL)
+ return (0);
if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE,
&tdp)) != 0)
return (error);
@@ -629,6 +644,8 @@
cnp->cn_flags |= SAVENAME;
return (0);
}
+ if (dd_ino != NULL)
+ return (0);
/*
* Step through the translation in the name. We do not `vput' the
@@ -655,8 +672,27 @@
VOP_UNLOCK(pdp, 0); /* race to get the inode */
error = VFS_VGET(vdp->v_mount, ino, cnp->cn_lkflags, &tdp);
vn_lock(pdp, ltype | LK_RETRY);
- if (error != 0)
+ if (pdp->v_iflag & VI_DOOMED) {
+ if (error == 0)
+ vput(tdp);
+ error = ENOENT;
+ }
+ if (error)
return (error);
+ /*
+ * Recheck that ".." entry in the vdp directory points
+ * to the inode we looked up before vdp lock was
+ * dropped.
+ */
+ error = ext2_lookup_ino(pdp, NULL, cnp, &ino1);
+ if (error) {
+ vput(tdp);
+ return (error);
+ }
+ if (ino1 != ino) {
+ vput(tdp);
+ goto restart;
+ }
*vpp = tdp;
} else if (dp->i_number == ino) {
VREF(vdp); /* we want ourself, ie "." */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ext2fs/ext2_vfsops.c
--- a/head/sys/fs/ext2fs/ext2_vfsops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ext2fs/ext2_vfsops.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94
- * $FreeBSD: head/sys/fs/ext2fs/ext2_vfsops.c 234386 2012-04-17 16:28:22Z mckusick $
+ * $FreeBSD: head/sys/fs/ext2fs/ext2_vfsops.c 238697 2012-07-22 15:40:31Z kevlo $
*/
#include <sys/param.h>
@@ -112,7 +112,7 @@
struct vfsoptlist *opts;
struct vnode *devvp;
struct thread *td;
- struct ext2mount *ump = 0;
+ struct ext2mount *ump = NULL;
struct m_ext2fs *fs;
struct nameidata nd, *ndp = &nd;
accmode_t accmode;
@@ -767,7 +767,7 @@
ump = VFSTOEXT2(mp);
fs = ump->um_e2fs;
if (fs->e2fs->e2fs_magic != E2FS_MAGIC)
- panic("ext2fs_statvfs");
+ panic("ext2fs_statfs");
/*
* Compute the overhead (FS structures)
@@ -830,7 +830,6 @@
/*
* Write back each (modified) inode.
*/
- MNT_ILOCK(mp);
loop:
MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
if (vp->v_type == VNON) {
@@ -847,7 +846,6 @@
}
error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
if (error) {
- MNT_ILOCK(mp);
if (error == ENOENT) {
MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
goto loop;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ext2fs/ext2_vnops.c
--- a/head/sys/fs/ext2fs/ext2_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ext2fs/ext2_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -39,7 +39,7 @@
*
* @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94
* @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
- * $FreeBSD: head/sys/fs/ext2fs/ext2_vnops.c 234203 2012-04-13 05:48:31Z jh $
+ * $FreeBSD: head/sys/fs/ext2fs/ext2_vnops.c 235508 2012-05-16 15:53:38Z pfg $
*/
#include "opt_suiddir.h"
@@ -1336,7 +1336,11 @@
error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
cnp->cn_thread);
cache_purge(ITOV(ip));
- vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
+ VOP_UNLOCK(vp, 0);
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ }
out:
return (error);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/hpfs/hpfs_vnops.c
--- a/head/sys/fs/hpfs/hpfs_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/hpfs/hpfs_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/fs/hpfs/hpfs_vnops.c 235984 2012-05-25 09:16:59Z gleb $
*/
#include <sys/param.h>
@@ -528,7 +528,7 @@
}
if (vap->va_size < hp->h_fn.fn_size) {
- error = vtruncbuf(vp, cred, td, vap->va_size, DEV_BSIZE);
+ error = vtruncbuf(vp, cred, vap->va_size, DEV_BSIZE);
if (error)
return (error);
error = hpfs_truncate(hp, vap->va_size);
@@ -576,7 +576,7 @@
}
if (hp->h_flag & H_INVAL) {
- vrecycle(vp, ap->a_td);
+ vrecycle(vp);
return (0);
}
@@ -797,10 +797,21 @@
}
-static struct dirent hpfs_de_dot =
- { 0, sizeof(struct dirent), DT_DIR, 1, "." };
-static struct dirent hpfs_de_dotdot =
- { 0, sizeof(struct dirent), DT_DIR, 2, ".." };
+static struct dirent hpfs_de_dot = {
+ .d_fileno = 0,
+ .d_reclen = sizeof(struct dirent),
+ .d_type = DT_DIR,
+ .d_namlen = 1,
+ .d_name = "."
+};
+static struct dirent hpfs_de_dotdot = {
+ .d_fileno = 0,
+ .d_reclen = sizeof(struct dirent),
+ .d_type = DT_DIR,
+ .d_namlen = 2,
+ .d_name = ".."
+};
+
int
hpfs_readdir(ap)
struct vop_readdir_args /* {
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/msdosfs/denode.h
--- a/head/sys/fs/msdosfs/denode.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/msdosfs/denode.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD$ */
+/* $FreeBSD: head/sys/fs/msdosfs/denode.h 234605 2012-04-23 13:21:28Z trasz $ */
/* $NetBSD: denode.h,v 1.25 1997/11/17 15:36:28 ws Exp $ */
/*-
@@ -276,6 +276,6 @@
int createde(struct denode *dep, struct denode *ddep, struct denode **depp, struct componentname *cnp);
int deupdat(struct denode *dep, int waitfor);
int removede(struct denode *pdep, struct denode *dep);
-int detrunc(struct denode *dep, u_long length, int flags, struct ucred *cred, struct thread *td);
+int detrunc(struct denode *dep, u_long length, int flags, struct ucred *cred);
int doscheckpath( struct denode *source, struct denode *target);
#endif /* _KERNEL */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/msdosfs/msdosfs_denode.c
--- a/head/sys/fs/msdosfs/msdosfs_denode.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/msdosfs/msdosfs_denode.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/fs/msdosfs/msdosfs_denode.c 231998 2012-02-22 13:01:17Z kib $ */
+/* $FreeBSD: head/sys/fs/msdosfs/msdosfs_denode.c 234607 2012-04-23 14:10:34Z trasz $ */
/* $NetBSD: msdosfs_denode.c,v 1.28 1998/02/10 14:10:00 mrg Exp $ */
/*-
@@ -326,12 +326,11 @@
* Truncate the file described by dep to the length specified by length.
*/
int
-detrunc(dep, length, flags, cred, td)
+detrunc(dep, length, flags, cred)
struct denode *dep;
u_long length;
int flags;
struct ucred *cred;
- struct thread *td;
{
int error;
int allerror;
@@ -426,7 +425,7 @@
dep->de_FileSize = length;
if (!isadir)
dep->de_flag |= DE_UPDATE | DE_MODIFIED;
- allerror = vtruncbuf(DETOV(dep), cred, td, length, pmp->pm_bpcluster);
+ allerror = vtruncbuf(DETOV(dep), cred, length, pmp->pm_bpcluster);
#ifdef MSDOSFS_DEBUG
if (allerror)
printf("detrunc(): vtruncbuf error %d\n", allerror);
@@ -504,7 +503,7 @@
error = extendfile(dep, count, NULL, NULL, DE_CLEAR);
if (error) {
/* truncate the added clusters away again */
- (void) detrunc(dep, dep->de_FileSize, 0, cred, NULL);
+ (void) detrunc(dep, dep->de_FileSize, 0, cred);
return (error);
}
}
@@ -584,7 +583,6 @@
{
struct vnode *vp = ap->a_vp;
struct denode *dep = VTODE(vp);
- struct thread *td = ap->a_td;
int error = 0;
#ifdef MSDOSFS_DEBUG
@@ -607,7 +605,7 @@
dep, dep->de_refcnt, vp->v_mount->mnt_flag, MNT_RDONLY);
#endif
if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
- error = detrunc(dep, (u_long) 0, 0, NOCRED, td);
+ error = detrunc(dep, (u_long) 0, 0, NOCRED);
dep->de_flag |= DE_UPDATE;
dep->de_Name[0] = SLOT_DELETED;
}
@@ -623,6 +621,6 @@
vrefcnt(vp), dep->de_Name[0]);
#endif
if (dep->de_Name[0] == SLOT_DELETED || dep->de_Name[0] == SLOT_EMPTY)
- vrecycle(vp, td);
+ vrecycle(vp);
return (error);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/msdosfs/msdosfs_lookup.c
--- a/head/sys/fs/msdosfs/msdosfs_lookup.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/msdosfs/msdosfs_lookup.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/fs/msdosfs/msdosfs_lookup.c 231998 2012-02-22 13:01:17Z kib $ */
+/* $FreeBSD: head/sys/fs/msdosfs/msdosfs_lookup.c 238697 2012-07-22 15:40:31Z kevlo $ */
/* $NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $ */
/*-
@@ -108,7 +108,7 @@
struct denode *dp;
struct denode *tdp;
struct msdosfsmount *pmp;
- struct buf *bp = 0;
+ struct buf *bp = NULL;
struct direntry *dep = NULL;
u_char dosfilename[12];
int flags = cnp->cn_flags;
@@ -649,7 +649,7 @@
dirclust = de_clcount(pmp, diroffset);
error = extendfile(ddep, dirclust, 0, 0, DE_CLEAR);
if (error) {
- (void)detrunc(ddep, ddep->de_FileSize, 0, NOCRED, NULL);
+ (void)detrunc(ddep, ddep->de_FileSize, 0, NOCRED);
return error;
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/msdosfs/msdosfs_vnops.c
--- a/head/sys/fs/msdosfs/msdosfs_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/msdosfs/msdosfs_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/fs/msdosfs/msdosfs_vnops.c 231998 2012-02-22 13:01:17Z kib $ */
+/* $FreeBSD: head/sys/fs/msdosfs/msdosfs_vnops.c 234605 2012-04-23 13:21:28Z trasz $ */
/* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */
/*-
@@ -476,7 +476,7 @@
*/
break;
}
- error = detrunc(dep, vap->va_size, 0, cred, td);
+ error = detrunc(dep, vap->va_size, 0, cred);
if (error)
return error;
}
@@ -835,11 +835,11 @@
errexit:
if (error) {
if (ioflag & IO_UNIT) {
- detrunc(dep, osize, ioflag & IO_SYNC, NOCRED, NULL);
+ detrunc(dep, osize, ioflag & IO_SYNC, NOCRED);
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
} else {
- detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL);
+ detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED);
if (uio->uio_resid != resid)
error = 0;
}
@@ -1429,7 +1429,6 @@
struct vnode *dvp = ap->a_dvp;
struct componentname *cnp = ap->a_cnp;
struct denode *ip, *dp;
- struct thread *td = cnp->cn_thread;
int error;
ip = VTODE(vp);
@@ -1467,7 +1466,7 @@
/*
* Truncate the directory that is being deleted.
*/
- error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, td);
+ error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred);
cache_purge(vp);
out:
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/bmap.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/bmap.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,621 @@
+/*-
+ * Copyright (c) 2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/bmap.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/signalvar.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+#include <sys/ktr.h>
+#include <sys/kdb.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include <machine/_inttypes.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+#include "bmap.h"
+
+static int bmap_getlbns(struct nandfs_node *, nandfs_lbn_t,
+ struct nandfs_indir *, int *);
+
+int
+bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t *vblk)
+{
+ struct nandfs_inode *ip;
+ struct nandfs_indir a[NIADDR + 1], *ap;
+ nandfs_daddr_t daddr;
+ struct buf *bp;
+ int error;
+ int num, *nump;
+
+ DPRINTF(BMAP, ("%s: node %p lblk %jx enter\n", __func__, node, lblk));
+ ip = &node->nn_inode;
+
+ ap = a;
+ nump = #
+
+ error = bmap_getlbns(node, lblk, ap, nump);
+ if (error)
+ return (error);
+
+ if (num == 0) {
+ *vblk = ip->i_db[lblk];
+ return (0);
+ }
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx trying ip->i_ib[%x]\n", __func__,
+ node, lblk, ap->in_off));
+ daddr = ip->i_ib[ap->in_off];
+ for (bp = NULL, ++ap; --num; ap++) {
+ if (daddr == 0) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with "
+ "vblk 0\n", __func__, node, lblk));
+ *vblk = 0;
+ return (0);
+ }
+ if (ap->in_lbn == lblk) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx ap->in_lbn=%jx "
+ "returning address of indirect block (%jx)\n",
+ __func__, node, lblk, ap->in_lbn, daddr));
+ *vblk = daddr;
+ return (0);
+ }
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx reading block "
+ "ap->in_lbn=%jx\n", __func__, node, lblk, ap->in_lbn));
+
+ error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off];
+ brelse(bp);
+ }
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with %jx\n", __func__,
+ node, lblk, daddr));
+ *vblk = daddr;
+
+ return (0);
+}
+
+int
+bmap_dirty_meta(struct nandfs_node *node, nandfs_lbn_t lblk, int force)
+{
+ struct nandfs_indir a[NIADDR+1], *ap;
+#ifdef DEBUG
+ nandfs_daddr_t daddr;
+#endif
+ struct buf *bp;
+ int error;
+ int num, *nump;
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx\n", __func__, node, lblk));
+
+ ap = a;
+ nump = #
+
+ error = bmap_getlbns(node, lblk, ap, nump);
+ if (error)
+ return (error);
+
+ /*
+ * Direct block, nothing to do
+ */
+ if (num == 0)
+ return (0);
+
+ DPRINTF(BMAP, ("%s: node %p reading blocks\n", __func__, node));
+
+ for (bp = NULL, ++ap; --num; ap++) {
+ error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+#ifdef DEBUG
+ daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off];
+ MPASS(daddr != 0 || node->nn_ino == 3);
+#endif
+
+ error = nandfs_dirty_buf_meta(bp, force);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+int
+bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk,
+ nandfs_daddr_t vblk)
+{
+ struct nandfs_inode *ip;
+ struct nandfs_indir a[NIADDR+1], *ap;
+ struct buf *bp;
+ nandfs_daddr_t daddr;
+ int error;
+ int num, *nump, i;
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx\n", __func__, node, lblk,
+ vblk));
+
+ ip = &node->nn_inode;
+
+ ap = a;
+ nump = #
+
+ error = bmap_getlbns(node, lblk, ap, nump);
+ if (error)
+ return (error);
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx got num=%d\n", __func__,
+ node, lblk, vblk, num));
+
+ if (num == 0) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx direct block\n", __func__,
+ node, lblk));
+ ip->i_db[lblk] = vblk;
+ return (0);
+ }
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block level %d\n",
+ __func__, node, lblk, ap->in_off));
+
+ if (num == 1) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block: inserting "
+ "%jx as vblk for indirect block %d\n", __func__, node,
+ lblk, vblk, ap->in_off));
+ ip->i_ib[ap->in_off] = vblk;
+ return (0);
+ }
+
+ bp = NULL;
+ daddr = ip->i_ib[a[0].in_off];
+ for (i = 1; i < num; i++) {
+ if (bp)
+ brelse(bp);
+ if (daddr == 0) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx create "
+ "block %jx %d\n", __func__, node, lblk, vblk,
+ a[i].in_lbn, a[i].in_off));
+ error = nandfs_bcreate_meta(node, a[i].in_lbn, NOCRED,
+ 0, &bp);
+ if (error)
+ return (error);
+ } else {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx read "
+ "block %jx %d\n", __func__, node, daddr, vblk,
+ a[i].in_lbn, a[i].in_off));
+ error = nandfs_bread_meta(node, a[i].in_lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ daddr = ((nandfs_daddr_t *)bp->b_data)[a[i].in_off];
+ }
+ i--;
+
+ DPRINTF(BMAP,
+ ("%s: bmap node %p lblk=%jx vblk=%jx inserting vblk level %d at "
+ "offset %d at %jx\n", __func__, node, lblk, vblk, i, a[i].in_off,
+ daddr));
+
+ if (!bp) {
+ nandfs_error("%s: cannot find indirect block\n", __func__);
+ return (-1);
+ }
+ ((nandfs_daddr_t *)bp->b_data)[a[i].in_off] = vblk;
+
+ error = nandfs_dirty_buf_meta(bp, 0);
+ if (error) {
+ nandfs_warning("%s: dirty failed buf: %p\n", __func__, bp);
+ return (error);
+ }
+ DPRINTF(BMAP, ("%s: exiting node %p lblk=%jx vblk=%jx\n", __func__,
+ node, lblk, vblk));
+
+ return (error);
+}
+
+CTASSERT(NIADDR <= 3);
+#define SINGLE 0 /* index of single indirect block */
+#define DOUBLE 1 /* index of double indirect block */
+#define TRIPLE 2 /* index of triple indirect block */
+
+static __inline nandfs_lbn_t
+lbn_offset(struct nandfs_device *fsdev, int level)
+{
+ nandfs_lbn_t res;
+
+ for (res = 1; level > 0; level--)
+ res *= MNINDIR(fsdev);
+ return (res);
+}
+
+static nandfs_lbn_t
+blocks_inside(struct nandfs_device *fsdev, int level, struct nandfs_indir *nip)
+{
+ nandfs_lbn_t blocks;
+
+ for (blocks = 1; level >= SINGLE; level--, nip++) {
+ MPASS(nip->in_off >= 0 && nip->in_off < MNINDIR(fsdev));
+ blocks += nip->in_off * lbn_offset(fsdev, level);
+ }
+
+ return (blocks);
+}
+
+static int
+bmap_truncate_indirect(struct nandfs_node *node, int level, nandfs_lbn_t *left,
+ int *cleaned, struct nandfs_indir *ap, struct nandfs_indir *fp,
+ nandfs_daddr_t *copy)
+{
+ struct buf *bp;
+ nandfs_lbn_t i, lbn, nlbn, factor, tosub;
+ struct nandfs_device *fsdev;
+ int error, lcleaned, modified;
+
+ DPRINTF(BMAP, ("%s: node %p level %d left %jx\n", __func__,
+ node, level, *left));
+
+ fsdev = node->nn_nandfsdev;
+
+ MPASS(ap->in_off >= 0 && ap->in_off < MNINDIR(fsdev));
+
+ factor = lbn_offset(fsdev, level);
+ lbn = ap->in_lbn;
+
+ error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ bcopy(bp->b_data, copy, fsdev->nd_blocksize);
+ bqrelse(bp);
+
+ modified = 0;
+
+ i = ap->in_off;
+
+ if (ap != fp)
+ ap++;
+ for (nlbn = lbn + 1 - i * factor; i >= 0 && *left > 0; i--,
+ nlbn += factor) {
+ lcleaned = 0;
+
+ DPRINTF(BMAP,
+ ("%s: node %p i=%jx nlbn=%jx left=%jx ap=%p vblk %jx\n",
+ __func__, node, i, nlbn, *left, ap, copy[i]));
+
+ if (copy[i] == 0) {
+ tosub = blocks_inside(fsdev, level - 1, ap);
+ if (tosub > *left)
+ tosub = 0;
+
+ *left -= tosub;
+ } else {
+ if (level > SINGLE) {
+ if (ap == fp)
+ ap->in_lbn = nlbn;
+
+ error = bmap_truncate_indirect(node, level - 1,
+ left, &lcleaned, ap, fp,
+ copy + MNINDIR(fsdev));
+ if (error)
+ return (error);
+ } else {
+ error = nandfs_bdestroy(node, copy[i]);
+ if (error)
+ return (error);
+ lcleaned = 1;
+ *left -= 1;
+ }
+ }
+
+ if (lcleaned) {
+ if (level > SINGLE) {
+ error = nandfs_vblock_end(fsdev, copy[i]);
+ if (error)
+ return (error);
+ }
+ copy[i] = 0;
+ modified++;
+ }
+
+ ap = fp;
+ }
+
+ if (i == -1)
+ *cleaned = 1;
+
+ error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ if (modified)
+ bcopy(copy, bp->b_data, fsdev->nd_blocksize);
+
+ error = nandfs_dirty_buf_meta(bp, 0);
+ if (error)
+ return (error);
+
+ return (error);
+}
+
+int
+bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t lastblk,
+ nandfs_lbn_t todo)
+{
+ struct nandfs_inode *ip;
+ struct nandfs_indir a[NIADDR + 1], f[NIADDR], *ap;
+ nandfs_daddr_t indir_lbn[NIADDR];
+ nandfs_daddr_t *copy;
+ int error, level;
+ nandfs_lbn_t left, tosub;
+ struct nandfs_device *fsdev;
+ int cleaned, i;
+ int num, *nump;
+
+ DPRINTF(BMAP, ("%s: node %p lastblk %jx truncating by %jx\n", __func__,
+ node, lastblk, todo));
+
+ ip = &node->nn_inode;
+ fsdev = node->nn_nandfsdev;
+
+ ap = a;
+ nump = #
+
+ error = bmap_getlbns(node, lastblk, ap, nump);
+ if (error)
+ return (error);
+
+ indir_lbn[SINGLE] = -NDADDR;
+ indir_lbn[DOUBLE] = indir_lbn[SINGLE] - MNINDIR(fsdev) - 1;
+ indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - MNINDIR(fsdev)
+ * MNINDIR(fsdev) - 1;
+
+ for (i = 0; i < NIADDR; i++) {
+ f[i].in_off = MNINDIR(fsdev) - 1;
+ f[i].in_lbn = 0xdeadbeef;
+ }
+
+ left = todo;
+
+#ifdef DEBUG
+ a[num].in_off = -1;
+#endif
+
+ ap++;
+ num -= 2;
+
+ if (num < 0)
+ goto direct;
+
+ copy = malloc(MNINDIR(fsdev) * sizeof(nandfs_daddr_t) * (num + 1),
+ M_NANDFSTEMP, M_WAITOK);
+
+ for (level = num; level >= SINGLE && left > 0; level--) {
+ cleaned = 0;
+
+ if (ip->i_ib[level] == 0) {
+ tosub = blocks_inside(fsdev, level, ap);
+ if (tosub > left)
+ left = 0;
+ else
+ left -= tosub;
+ } else {
+ if (ap == f)
+ ap->in_lbn = indir_lbn[level];
+ error = bmap_truncate_indirect(node, level, &left,
+ &cleaned, ap, f, copy);
+ if (error) {
+ nandfs_error("%s: error %d when truncate "
+ "at level %d\n", __func__, error, level);
+ return (error);
+ }
+ }
+
+ if (cleaned) {
+ nandfs_vblock_end(fsdev, ip->i_ib[level]);
+ ip->i_ib[level] = 0;
+ }
+
+ ap = f;
+ }
+
+ free(copy, M_NANDFSTEMP);
+
+direct:
+ if (num < 0)
+ i = lastblk;
+ else
+ i = NDADDR - 1;
+
+ for (; i >= 0 && left > 0; i--) {
+ if (ip->i_db[i] != 0) {
+ error = nandfs_bdestroy(node, ip->i_db[i]);
+ if (error) {
+ nandfs_error("%s: cannot destroy "
+ "block %jx, error %d\n", __func__,
+ (uintmax_t)ip->i_db[i], error);
+ return (error);
+ }
+ ip->i_db[i] = 0;
+ }
+
+ left--;
+ }
+
+ KASSERT(left == 0,
+ ("truncated wrong number of blocks (%jd should be 0)", left));
+
+ return (error);
+}
+
+nandfs_lbn_t
+get_maxfilesize(struct nandfs_device *fsdev)
+{
+ struct nandfs_indir f[NIADDR];
+ nandfs_lbn_t max;
+ int i;
+
+ max = NDADDR;
+
+ for (i = 0; i < NIADDR; i++) {
+ f[i].in_off = MNINDIR(fsdev) - 1;
+ max += blocks_inside(fsdev, i, f);
+ }
+
+ max *= fsdev->nd_blocksize;
+
+ return (max);
+}
+
+/*
+ * This is ufs_getlbns with minor modifications.
+ */
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block. The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+static int
+bmap_getlbns(struct nandfs_node *node, nandfs_lbn_t bn, struct nandfs_indir *ap, int *nump)
+{
+ nandfs_daddr_t blockcnt;
+ nandfs_lbn_t metalbn, realbn;
+ struct nandfs_device *fsdev;
+ int i, numlevels, off;
+
+ fsdev = node->nn_nandfsdev;
+
+ DPRINTF(BMAP, ("%s: node %p bn=%jx mnindir=%zd enter\n", __func__,
+ node, bn, MNINDIR(fsdev)));
+
+ *nump = 0;
+ numlevels = 0;
+ realbn = bn;
+
+ if (bn < 0)
+ bn = -bn;
+
+ /* The first NDADDR blocks are direct blocks. */
+ if (bn < NDADDR)
+ return (0);
+
+ /*
+ * Determine the number of levels of indirection. After this loop
+ * is done, blockcnt indicates the number of data blocks possible
+ * at the previous level of indirection, and NIADDR - i is the number
+ * of levels of indirection needed to locate the requested block.
+ */
+ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ DPRINTF(BMAP, ("%s: blockcnt=%jd i=%d bn=%jd\n", __func__,
+ blockcnt, i, bn));
+ if (i == 0)
+ return (EFBIG);
+ blockcnt *= MNINDIR(fsdev);
+ if (bn < blockcnt)
+ break;
+ }
+
+ /* Calculate the address of the first meta-block. */
+ if (realbn >= 0)
+ metalbn = -(realbn - bn + NIADDR - i);
+ else
+ metalbn = -(-realbn - bn + NIADDR - i);
+
+ /*
+ * At each iteration, off is the offset into the bap array which is
+ * an array of disk addresses at the current level of indirection.
+ * The logical block number and the offset in that block are stored
+ * into the argument array.
+ */
+ ap->in_lbn = metalbn;
+ ap->in_off = off = NIADDR - i;
+
+ DPRINTF(BMAP, ("%s: initial: ap->in_lbn=%jx ap->in_off=%d\n", __func__,
+ metalbn, off));
+
+ ap++;
+ for (++numlevels; i <= NIADDR; i++) {
+ /* If searching for a meta-data block, quit when found. */
+ if (metalbn == realbn)
+ break;
+
+ blockcnt /= MNINDIR(fsdev);
+ off = (bn / blockcnt) % MNINDIR(fsdev);
+
+ ++numlevels;
+ ap->in_lbn = metalbn;
+ ap->in_off = off;
+
+ DPRINTF(BMAP, ("%s: in_lbn=%jx in_off=%d\n", __func__,
+ ap->in_lbn, ap->in_off));
+ ++ap;
+
+ metalbn -= -1 + off * blockcnt;
+ }
+ if (nump)
+ *nump = numlevels;
+
+ DPRINTF(BMAP, ("%s: numlevels=%d\n", __func__, numlevels));
+
+ return (0);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/bmap.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/bmap.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/fs/nandfs/bmap.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _BMAP_H
+#define _BMAP_H
+
+#include "nandfs_fs.h"
+
+int bmap_lookup(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t *);
+int bmap_insert_block(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t);
+int bmap_truncate_mapping(struct nandfs_node *, nandfs_lbn_t, nandfs_lbn_t);
+int bmap_dirty_meta(struct nandfs_node *, nandfs_lbn_t, int);
+
+nandfs_lbn_t get_maxfilesize(struct nandfs_device *);
+
+#endif /* _BMAP_H */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,310 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs.h,v 1.1 2009/07/18 16:31:42 reinoud
+ *
+ * $FreeBSD: head/sys/fs/nandfs/nandfs.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _FS_NANDFS_NANDFS_H_
+#define _FS_NANDFS_NANDFS_H_
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/condvar.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <sys/queue.h>
+#include <sys/uio.h>
+#include <sys/mutex.h>
+
+#include <sys/disk.h>
+#include <sys/kthread.h>
+#include "nandfs_fs.h"
+
+MALLOC_DECLARE(M_NANDFSTEMP);
+
+/* Debug categories */
+#define NANDFS_DEBUG_VOLUMES 0x000001
+#define NANDFS_DEBUG_BLOCK 0x000004
+#define NANDFS_DEBUG_LOCKING 0x000008
+#define NANDFS_DEBUG_NODE 0x000010
+#define NANDFS_DEBUG_LOOKUP 0x000020
+#define NANDFS_DEBUG_READDIR 0x000040
+#define NANDFS_DEBUG_TRANSLATE 0x000080
+#define NANDFS_DEBUG_STRATEGY 0x000100
+#define NANDFS_DEBUG_READ 0x000200
+#define NANDFS_DEBUG_WRITE 0x000400
+#define NANDFS_DEBUG_IFILE 0x000800
+#define NANDFS_DEBUG_ATTR 0x001000
+#define NANDFS_DEBUG_EXTATTR 0x002000
+#define NANDFS_DEBUG_ALLOC 0x004000
+#define NANDFS_DEBUG_CPFILE 0x008000
+#define NANDFS_DEBUG_DIRHASH 0x010000
+#define NANDFS_DEBUG_NOTIMPL 0x020000
+#define NANDFS_DEBUG_SHEDULE 0x040000
+#define NANDFS_DEBUG_SEG 0x080000
+#define NANDFS_DEBUG_SYNC 0x100000
+#define NANDFS_DEBUG_PARANOIA 0x200000
+#define NANDFS_DEBUG_VNCALL 0x400000
+#define NANDFS_DEBUG_BUF 0x1000000
+#define NANDFS_DEBUG_BMAP 0x2000000
+#define NANDFS_DEBUG_DAT 0x4000000
+#define NANDFS_DEBUG_GENERIC 0x8000000
+#define NANDFS_DEBUG_CLEAN 0x10000000
+
+extern int nandfs_verbose;
+
+#define DPRINTF(name, arg) { \
+ if (nandfs_verbose & NANDFS_DEBUG_##name) {\
+ printf arg;\
+ };\
+ }
+#define DPRINTFIF(name, cond, arg) { \
+ if (nandfs_verbose & NANDFS_DEBUG_##name) { \
+ if (cond) printf arg;\
+ };\
+ }
+
+#define VFSTONANDFS(mp) ((struct nandfsmount *)((mp)->mnt_data))
+#define VTON(vp) ((struct nandfs_node *)(vp)->v_data)
+#define NTOV(xp) ((xp)->nn_vnode)
+
+int nandfs_init(struct vfsconf *);
+int nandfs_uninit(struct vfsconf *);
+
+extern struct vop_vector nandfs_vnodeops;
+extern struct vop_vector nandfs_system_vnodeops;
+
+struct nandfs_node;
+
+/* Structure and derivatives */
+struct nandfs_mdt {
+ uint32_t entries_per_block;
+ uint32_t entries_per_group;
+ uint32_t blocks_per_group;
+ uint32_t groups_per_desc_block; /* desc is super group */
+ uint32_t blocks_per_desc_block; /* desc is super group */
+};
+
+struct nandfs_segment {
+ LIST_ENTRY(nandfs_segment) seg_link;
+
+ struct nandfs_device *fsdev;
+
+ TAILQ_HEAD(, buf) segsum;
+ TAILQ_HEAD(, buf) data;
+
+ uint64_t seg_num;
+ uint64_t seg_next;
+ uint64_t start_block;
+ uint32_t num_blocks;
+
+ uint32_t nblocks;
+ uint32_t nbinfos;
+ uint32_t segsum_blocks;
+ uint32_t segsum_bytes;
+ uint32_t bytes_left;
+ char *current_off;
+};
+
+struct nandfs_seginfo {
+ LIST_HEAD( ,nandfs_segment) seg_list;
+ struct nandfs_segment *curseg;
+ struct nandfs_device *fsdev;
+ uint32_t blocks;
+ uint8_t reiterate;
+};
+
+#define NANDFS_FSSTOR_FAILED 1
+struct nandfs_fsarea {
+ int offset;
+ int flags;
+ int last_used;
+};
+
+extern int nandfs_cleaner_enable;
+extern int nandfs_cleaner_interval;
+extern int nandfs_cleaner_segments;
+
+struct nandfs_device {
+ struct vnode *nd_devvp;
+ struct g_consumer *nd_gconsumer;
+
+ struct thread *nd_syncer;
+ struct thread *nd_cleaner;
+ int nd_syncer_exit;
+ int nd_cleaner_exit;
+
+ int nd_is_nand;
+
+ struct nandfs_fsarea nd_fsarea[NANDFS_NFSAREAS];
+ int nd_last_fsarea;
+
+ STAILQ_HEAD(nandfs_mnts, nandfsmount) nd_mounts;
+ SLIST_ENTRY(nandfs_device) nd_next_device;
+
+ /* FS structures */
+ struct nandfs_fsdata nd_fsdata;
+ struct nandfs_super_block nd_super;
+ struct nandfs_segment_summary nd_last_segsum;
+ struct nandfs_super_root nd_super_root;
+ struct nandfs_node *nd_dat_node;
+ struct nandfs_node *nd_cp_node;
+ struct nandfs_node *nd_su_node;
+ struct nandfs_node *nd_gc_node;
+
+ struct nandfs_mdt nd_dat_mdt;
+ struct nandfs_mdt nd_ifile_mdt;
+
+ struct timespec nd_ts;
+
+ /* Synchronization */
+ struct mtx nd_mutex;
+ struct mtx nd_sync_mtx;
+ struct cv nd_sync_cv;
+ struct mtx nd_clean_mtx;
+ struct cv nd_clean_cv;
+ struct lock nd_seg_const;
+
+ struct nandfs_seginfo *nd_seginfo;
+
+ /* FS geometry */
+ uint64_t nd_devsize;
+ uint64_t nd_maxfilesize;
+ uint32_t nd_blocksize;
+ uint32_t nd_erasesize;
+
+ uint32_t nd_devblocksize;
+
+ /* Segment usage */
+ uint64_t nd_clean_segs;
+ uint64_t *nd_free_base;
+ uint64_t nd_free_count;
+ uint64_t nd_dirty_bufs;
+
+ /* Running values */
+ uint64_t nd_seg_sequence;
+ uint64_t nd_seg_num;
+ uint64_t nd_next_seg_num;
+ uint64_t nd_last_pseg;
+ uint64_t nd_last_cno;
+ uint64_t nd_last_ino;
+ uint64_t nd_fakevblk;
+
+ int nd_mount_state;
+ int nd_refcnt;
+ int nd_syncing;
+ int nd_cleaning;
+};
+
+extern SLIST_HEAD(_nandfs_devices, nandfs_device) nandfs_devices;
+
+#define NANDFS_FORCE_SYNCER 0x1
+#define NANDFS_UMOUNT 0x2
+
+#define SYNCER_UMOUNT 0x0
+#define SYNCER_VFS_SYNC 0x1
+#define SYNCER_BDFLUSH 0x2
+#define SYNCER_FFORCE 0x3
+#define SYNCER_FSYNC 0x4
+#define SYNCER_ROUPD 0x5
+
+static __inline int
+nandfs_writelockflags(struct nandfs_device *fsdev, int flags)
+{
+ int error = 0;
+
+ if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE)
+ error = lockmgr(&fsdev->nd_seg_const, flags | LK_SHARED, NULL);
+
+ return (error);
+}
+
+static __inline void
+nandfs_writeunlock(struct nandfs_device *fsdev)
+{
+
+ if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE)
+ lockmgr(&(fsdev)->nd_seg_const, LK_RELEASE, NULL);
+}
+
+#define NANDFS_WRITELOCKFLAGS(fsdev, flags) nandfs_writelockflags(fsdev, flags)
+
+#define NANDFS_WRITELOCK(fsdev) NANDFS_WRITELOCKFLAGS(fsdev, 0)
+
+#define NANDFS_WRITEUNLOCK(fsdev) nandfs_writeunlock(fsdev)
+
+#define NANDFS_WRITEASSERT(fsdev) lockmgr_assert(&(fsdev)->nd_seg_const, KA_LOCKED)
+
+/* Specific mountpoint; head or a checkpoint/snapshot */
+struct nandfsmount {
+ STAILQ_ENTRY(nandfsmount) nm_next_mount;
+
+ struct mount *nm_vfs_mountp;
+ struct nandfs_device *nm_nandfsdev;
+ struct nandfs_args nm_mount_args;
+ struct nandfs_node *nm_ifile_node;
+
+ uint8_t nm_flags;
+ int8_t nm_ronly;
+};
+
+struct nandfs_node {
+ struct vnode *nn_vnode;
+ struct nandfsmount *nn_nmp;
+ struct nandfs_device *nn_nandfsdev;
+ struct lockf *nn_lockf;
+
+ uint64_t nn_ino;
+ struct nandfs_inode nn_inode;
+
+ uint64_t nn_diroff;
+ uint32_t nn_flags;
+};
+
+#define IN_ACCESS 0x0001 /* Inode access time update request */
+#define IN_CHANGE 0x0002 /* Inode change time update request */
+#define IN_UPDATE 0x0004 /* Inode was written to; update mtime*/
+#define IN_MODIFIED 0x0008 /* node has been modified */
+#define IN_RENAME 0x0010 /* node is being renamed. */
+
+/* File permissions. */
+#define IEXEC 0000100 /* Executable. */
+#define IWRITE 0000200 /* Writeable. */
+#define IREAD 0000400 /* Readable. */
+#define ISVTX 0001000 /* Sticky bit. */
+#define ISGID 0002000 /* Set-gid. */
+#define ISUID 0004000 /* Set-uid. */
+
+#define PRINT_NODE_FLAGS \
+ "\10\1IN_ACCESS\2IN_CHANGE\3IN_UPDATE\4IN_MODIFIED\5IN_RENAME"
+
+#define NANDFS_GATHER(x) ((x)->b_flags |= B_00800000)
+#define NANDFS_UNGATHER(x) ((x)->b_flags &= ~B_00800000)
+#define NANDFS_ISGATHERED(x) ((x)->b_flags & B_00800000)
+
+#endif /* !_FS_NANDFS_NANDFS_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_alloc.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_alloc.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,364 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_alloc.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+static void
+nandfs_get_desc_block_nr(struct nandfs_mdt *mdt, uint64_t desc,
+ uint64_t *desc_block)
+{
+
+ *desc_block = desc * mdt->blocks_per_desc_block;
+}
+
+static void
+nandfs_get_group_block_nr(struct nandfs_mdt *mdt, uint64_t group,
+ uint64_t *group_block)
+{
+ uint64_t desc, group_off;
+
+ desc = group / mdt->groups_per_desc_block;
+ group_off = group % mdt->groups_per_desc_block;
+ *group_block = desc * mdt->blocks_per_desc_block +
+ 1 + group_off * mdt->blocks_per_group;
+}
+
+static void
+init_desc_block(struct nandfs_mdt *mdt, uint8_t *block_data)
+{
+ struct nandfs_block_group_desc *desc;
+ uint32_t i;
+
+ desc = (struct nandfs_block_group_desc *) block_data;
+ for (i = 0; i < mdt->groups_per_desc_block; i++)
+ desc[i].bg_nfrees = mdt->entries_per_group;
+}
+
+int
+nandfs_find_free_entry(struct nandfs_mdt *mdt, struct nandfs_node *node,
+ struct nandfs_alloc_request *req)
+{
+ nandfs_daddr_t desc, group, maxgroup, maxdesc, pos = 0;
+ nandfs_daddr_t start_group, start_desc;
+ nandfs_daddr_t desc_block, group_block;
+ nandfs_daddr_t file_blocks;
+ struct nandfs_block_group_desc *descriptors;
+ struct buf *bp, *bp2;
+ uint32_t *mask, i, mcount, msize;
+ int error;
+
+ file_blocks = node->nn_inode.i_blocks;
+ maxgroup = 0x100000000ull / mdt->entries_per_group;
+ maxdesc = maxgroup / mdt->groups_per_desc_block;
+ start_group = req->entrynum / mdt->entries_per_group;
+ start_desc = start_group / mdt->groups_per_desc_block;
+
+ bp = bp2 = NULL;
+restart:
+ for (desc = start_desc; desc < maxdesc; desc++) {
+ nandfs_get_desc_block_nr(mdt, desc, &desc_block);
+
+ if (bp)
+ brelse(bp);
+ if (desc_block < file_blocks) {
+ error = nandfs_bread(node, desc_block, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ } else {
+ error = nandfs_bcreate(node, desc_block, NOCRED, 0,
+ &bp);
+ if (error)
+ return (error);
+ file_blocks++;
+ init_desc_block(mdt, bp->b_data);
+ }
+
+ descriptors = (struct nandfs_block_group_desc *) bp->b_data;
+ for (group = start_group; group < mdt->groups_per_desc_block;
+ group++) {
+ if (descriptors[group].bg_nfrees > 0) {
+ nandfs_get_group_block_nr(mdt, group,
+ &group_block);
+
+ if (bp2)
+ brelse(bp2);
+ if (group_block < file_blocks) {
+ error = nandfs_bread(node, group_block,
+ NOCRED, 0, &bp2);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ } else {
+ error = nandfs_bcreate(node,
+ group_block, NOCRED, 0, &bp2);
+ if (error)
+ return (error);
+ file_blocks++;
+ }
+ mask = (uint32_t *)bp2->b_data;
+ msize = (sizeof(uint32_t) * __CHAR_BIT);
+ mcount = mdt->entries_per_group / msize;
+ for (i = 0; i < mcount; i++) {
+ if (mask[i] == UINT32_MAX)
+ continue;
+
+ pos = ffs(~mask[i]) - 1;
+ pos += (msize * i);
+ pos += (group * mdt->entries_per_group);
+ pos += desc * group *
+ mdt->groups_per_desc_block *
+ mdt->entries_per_group;
+ goto found;
+ }
+ }
+ }
+ start_group = 0;
+ }
+
+ if (start_desc != 0) {
+ maxdesc = start_desc;
+ start_desc = 0;
+ req->entrynum = 0;
+ goto restart;
+ }
+
+ return (ENOENT);
+
+found:
+ req->entrynum = pos;
+ req->bp_desc = bp;
+ req->bp_bitmap = bp2;
+ DPRINTF(ALLOC, ("%s: desc: %p bitmap: %p entry: %#jx\n",
+ __func__, req->bp_desc, req->bp_bitmap, (uintmax_t)pos));
+
+ return (0);
+}
+
+int
+nandfs_find_entry(struct nandfs_mdt* mdt, struct nandfs_node *nnode,
+ struct nandfs_alloc_request *req)
+{
+ uint64_t dblock, bblock, eblock;
+ uint32_t offset;
+ int error;
+
+ nandfs_mdt_trans_blk(mdt, req->entrynum, &dblock, &bblock, &eblock,
+ &offset);
+
+ error = nandfs_bread(nnode, dblock, NOCRED, 0, &req->bp_desc);
+ if (error) {
+ brelse(req->bp_desc);
+ return (error);
+ }
+
+ error = nandfs_bread(nnode, bblock, NOCRED, 0, &req->bp_bitmap);
+ if (error) {
+ brelse(req->bp_desc);
+ brelse(req->bp_bitmap);
+ return (error);
+ }
+
+ error = nandfs_bread(nnode, eblock, NOCRED, 0, &req->bp_entry);
+ if (error) {
+ brelse(req->bp_desc);
+ brelse(req->bp_bitmap);
+ brelse(req->bp_entry);
+ return (error);
+ }
+
+ DPRINTF(ALLOC,
+ ("%s: desc_buf: %p bitmap_buf %p entry_buf %p offset %x\n",
+ __func__, req->bp_desc, req->bp_bitmap, req->bp_entry, offset));
+
+ return (0);
+}
+
+static __inline void
+nandfs_calc_idx_entry(struct nandfs_mdt* mdt, uint32_t entrynum,
+ uint64_t *group, uint64_t *bitmap_idx, uint64_t *bitmap_off)
+{
+
+ /* Find group_desc index */
+ entrynum = entrynum %
+ (mdt->entries_per_group * mdt->groups_per_desc_block);
+ *group = entrynum / mdt->entries_per_group;
+ /* Find bitmap index and bit offset */
+ entrynum = entrynum % mdt->entries_per_group;
+ *bitmap_idx = entrynum / (sizeof(uint32_t) * __CHAR_BIT);
+ *bitmap_off = entrynum % (sizeof(uint32_t) * __CHAR_BIT);
+}
+
+int
+nandfs_free_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req)
+{
+ struct nandfs_block_group_desc *descriptors;
+ uint64_t bitmap_idx, bitmap_off;
+ uint64_t group;
+ uint32_t *mask, maskrw;
+
+ nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx,
+ &bitmap_off);
+
+ DPRINTF(ALLOC, ("nandfs_free_entry: req->entrynum=%jx bitmap_idx=%jx"
+ " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum,
+ (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group));
+
+ /* Update counter of free entries for group */
+ descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data;
+ descriptors[group].bg_nfrees++;
+
+ /* Set bit to indicate that entry is taken */
+ mask = (uint32_t *)req->bp_bitmap->b_data;
+ maskrw = mask[bitmap_idx];
+ KASSERT(maskrw & (1 << bitmap_off), ("freeing unallocated vblock"));
+ maskrw &= ~(1 << bitmap_off);
+ mask[bitmap_idx] = maskrw;
+
+ /* Make descriptor, bitmap and entry buffer dirty */
+ if (nandfs_dirty_buf(req->bp_desc, 0) == 0) {
+ nandfs_dirty_buf(req->bp_bitmap, 1);
+ nandfs_dirty_buf(req->bp_entry, 1);
+ } else {
+ brelse(req->bp_bitmap);
+ brelse(req->bp_entry);
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+nandfs_alloc_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req)
+{
+ struct nandfs_block_group_desc *descriptors;
+ uint64_t bitmap_idx, bitmap_off;
+ uint64_t group;
+ uint32_t *mask, maskrw;
+
+ nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx,
+ &bitmap_off);
+
+ DPRINTF(ALLOC, ("nandfs_alloc_entry: req->entrynum=%jx bitmap_idx=%jx"
+ " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum,
+ (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group));
+
+ /* Update counter of free entries for group */
+ descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data;
+ descriptors[group].bg_nfrees--;
+
+ /* Clear bit to indicate that entry is free */
+ mask = (uint32_t *)req->bp_bitmap->b_data;
+ maskrw = mask[bitmap_idx];
+ maskrw |= 1 << bitmap_off;
+ mask[bitmap_idx] = maskrw;
+
+ /* Make descriptor, bitmap and entry buffer dirty */
+ if (nandfs_dirty_buf(req->bp_desc, 0) == 0) {
+ nandfs_dirty_buf(req->bp_bitmap, 1);
+ nandfs_dirty_buf(req->bp_entry, 1);
+ } else {
+ brelse(req->bp_bitmap);
+ brelse(req->bp_entry);
+ return (-1);
+ }
+
+ return (0);
+}
+
+void
+nandfs_abort_entry(struct nandfs_alloc_request *req)
+{
+
+ brelse(req->bp_desc);
+ brelse(req->bp_bitmap);
+ brelse(req->bp_entry);
+}
+
+int
+nandfs_get_entry_block(struct nandfs_mdt *mdt, struct nandfs_node *node,
+ struct nandfs_alloc_request *req, uint32_t *entry, int create)
+{
+ struct buf *bp;
+ nandfs_lbn_t blocknr;
+ int error;
+
+ /* Find buffer number for given entry */
+ nandfs_mdt_trans(mdt, req->entrynum, &blocknr, entry);
+ DPRINTF(ALLOC, ("%s: ino %#jx entrynum:%#jx block:%#jx entry:%x\n",
+ __func__, (uintmax_t)node->nn_ino, (uintmax_t)req->entrynum,
+ (uintmax_t)blocknr, *entry));
+
+ /* Read entry block or create if 'create' parameter is not zero */
+ bp = NULL;
+
+ if (blocknr < node->nn_inode.i_blocks)
+ error = nandfs_bread(node, blocknr, NOCRED, 0, &bp);
+ else if (create)
+ error = nandfs_bcreate(node, blocknr, NOCRED, 0, &bp);
+ else
+ error = E2BIG;
+
+ if (error) {
+ DPRINTF(ALLOC, ("%s: ino %#jx block %#jx entry %x error %d\n",
+ __func__, (uintmax_t)node->nn_ino, (uintmax_t)blocknr,
+ *entry, error));
+ if (bp)
+ brelse(bp);
+ return (error);
+ }
+
+ MPASS(nandfs_vblk_get(bp) != 0 || node->nn_ino == NANDFS_DAT_INO);
+
+ req->bp_entry = bp;
+ return (0);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_bmap.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_bmap.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,230 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_bmap.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/signalvar.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+#include <sys/ktr.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include <machine/_inttypes.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+#include "bmap.h"
+
+nandfs_lbn_t
+nandfs_get_maxfilesize(struct nandfs_device *fsdev)
+{
+
+ return (get_maxfilesize(fsdev));
+}
+
+int
+nandfs_bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk,
+ nandfs_daddr_t *vblk)
+{
+ int error = 0;
+
+ if (node->nn_ino == NANDFS_GC_INO && lblk >= 0)
+ *vblk = lblk;
+ else
+ error = bmap_lookup(node, lblk, vblk);
+
+ DPRINTF(TRANSLATE, ("%s: error %d ino %#jx lblocknr %#jx -> %#jx\n",
+ __func__, error, (uintmax_t)node->nn_ino, (uintmax_t)lblk,
+ (uintmax_t)*vblk));
+
+ if (error)
+ nandfs_error("%s: returned %d", __func__, error);
+
+ return (error);
+}
+
+int
+nandfs_bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk,
+ struct buf *bp)
+{
+ struct nandfs_device *fsdev;
+ nandfs_daddr_t vblk;
+ int error;
+
+ fsdev = node->nn_nandfsdev;
+
+ vblk = 0;
+ if (node->nn_ino != NANDFS_DAT_INO) {
+ error = nandfs_vblock_alloc(fsdev, &vblk);
+ if (error)
+ return (error);
+ }
+
+ nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED);
+ nandfs_vblk_set(bp, vblk);
+
+ error = bmap_insert_block(node, lblk, vblk);
+ if (error) {
+ nandfs_vblock_free(fsdev, vblk);
+ return (error);
+ }
+
+ return (0);
+}
+
+int
+nandfs_bmap_dirty_blocks(struct nandfs_node *node, struct buf *bp, int force)
+{
+ int error;
+
+ error = bmap_dirty_meta(node, bp->b_lblkno, force);
+ if (error)
+ nandfs_error("%s: cannot dirty buffer %p\n",
+ __func__, bp);
+
+ return (error);
+}
+
+static int
+nandfs_bmap_update_mapping(struct nandfs_node *node, nandfs_lbn_t lblk,
+ nandfs_daddr_t blknr)
+{
+ int error;
+
+ DPRINTF(BMAP,
+ ("%s: node: %p ino: %#jx lblk: %#jx vblk: %#jx\n",
+ __func__, node, (uintmax_t)node->nn_ino, (uintmax_t)lblk,
+ (uintmax_t)blknr));
+
+ error = bmap_insert_block(node, lblk, blknr);
+
+ return (error);
+}
+
+int
+nandfs_bmap_update_block(struct nandfs_node *node, struct buf *bp,
+ nandfs_lbn_t blknr)
+{
+ nandfs_lbn_t lblk;
+ int error;
+
+ lblk = bp->b_lblkno;
+ nandfs_vblk_set(bp, blknr);
+
+ DPRINTF(BMAP, ("%s: node: %p ino: %#jx bp: %p lblk: %#jx blk: %#jx\n",
+ __func__, node, (uintmax_t)node->nn_ino, bp,
+ (uintmax_t)lblk, (uintmax_t)blknr));
+
+ error = nandfs_bmap_update_mapping(node, lblk, blknr);
+ if (error) {
+ nandfs_error("%s: cannot update lblk:%jx to blk:%jx for "
+ "node:%p, error:%d\n", __func__, (uintmax_t)lblk,
+ (uintmax_t)blknr, node, error);
+ return (error);
+ }
+
+ return (error);
+}
+
+int
+nandfs_bmap_update_dat(struct nandfs_node *node, nandfs_daddr_t oldblk,
+ struct buf *bp)
+{
+ struct nandfs_device *fsdev;
+ nandfs_daddr_t vblk = 0;
+ int error;
+
+ if (node->nn_ino == NANDFS_DAT_INO)
+ return (0);
+
+ if (nandfs_buf_check(bp, NANDFS_VBLK_ASSIGNED)) {
+ nandfs_buf_clear(bp, NANDFS_VBLK_ASSIGNED);
+ return (0);
+ }
+
+ fsdev = node->nn_nandfsdev;
+
+ /* First alloc new virtual block.... */
+ error = nandfs_vblock_alloc(fsdev, &vblk);
+ if (error)
+ return (error);
+
+ error = nandfs_bmap_update_block(node, bp, vblk);
+ if (error)
+ return (error);
+
+ /* Then we can end up with old one */
+ nandfs_vblock_end(fsdev, oldblk);
+
+ DPRINTF(BMAP,
+ ("%s: ino %#jx block %#jx: update vblk %#jx to %#jx\n",
+ __func__, (uintmax_t)node->nn_ino, (uintmax_t)bp->b_lblkno,
+ (uintmax_t)oldblk, (uintmax_t)vblk));
+ return (error);
+}
+
+int
+nandfs_bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t oblk,
+ nandfs_lbn_t nblk)
+{
+ nandfs_lbn_t todo;
+ int error;
+
+ todo = oblk - nblk;
+
+ DPRINTF(BMAP, ("%s: node %p oblk %jx nblk %jx truncate by %jx\n",
+ __func__, node, oblk, nblk, todo));
+
+ error = bmap_truncate_mapping(node, oblk, todo);
+ if (error)
+ return (error);
+
+ return (error);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_buffer.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_buffer.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_buffer.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/buf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/bio.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+struct buf *
+nandfs_geteblk(int size, int flags)
+{
+ struct buf *bp;
+
+ /*
+ * XXX
+ * Right now we can call geteblk with GB_NOWAIT_BD flag, which means
+ * it can return NULL. But we cannot afford to get NULL, hence this panic.
+ */
+ bp = geteblk(size, flags);
+ if (bp == NULL)
+ panic("geteblk returned NULL");
+
+ return (bp);
+}
+
+void
+nandfs_dirty_bufs_increment(struct nandfs_device *fsdev)
+{
+
+ mtx_lock(&fsdev->nd_mutex);
+ KASSERT(fsdev->nd_dirty_bufs >= 0, ("negative nd_dirty_bufs"));
+ fsdev->nd_dirty_bufs++;
+ mtx_unlock(&fsdev->nd_mutex);
+}
+
+void
+nandfs_dirty_bufs_decrement(struct nandfs_device *fsdev)
+{
+
+ mtx_lock(&fsdev->nd_mutex);
+ KASSERT(fsdev->nd_dirty_bufs > 0,
+ ("decrementing not-positive nd_dirty_bufs"));
+ fsdev->nd_dirty_bufs--;
+ mtx_unlock(&fsdev->nd_mutex);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_cleaner.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_cleaner.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,620 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_cleaner.c 236188 2012-05-28 16:33:58Z marcel $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/buf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/bio.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+#define NANDFS_CLEANER_KILL 1
+
+static void nandfs_cleaner(struct nandfs_device *);
+static int nandfs_cleaner_clean_segments(struct nandfs_device *,
+ struct nandfs_vinfo *, uint32_t, struct nandfs_period *, uint32_t,
+ struct nandfs_bdesc *, uint32_t, uint64_t *, uint32_t);
+
+static int
+nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd,
+ uint64_t nmembs);
+
+static void
+nandfs_wakeup_wait_cleaner(struct nandfs_device *fsdev, int reason)
+{
+
+ mtx_lock(&fsdev->nd_clean_mtx);
+ if (reason == NANDFS_CLEANER_KILL)
+ fsdev->nd_cleaner_exit = 1;
+ if (fsdev->nd_cleaning == 0) {
+ fsdev->nd_cleaning = 1;
+ wakeup(&fsdev->nd_cleaning);
+ }
+ cv_wait(&fsdev->nd_clean_cv, &fsdev->nd_clean_mtx);
+ mtx_unlock(&fsdev->nd_clean_mtx);
+}
+
+int
+nandfs_start_cleaner(struct nandfs_device *fsdev)
+{
+ int error;
+
+ MPASS(fsdev->nd_cleaner == NULL);
+
+ fsdev->nd_cleaner_exit = 0;
+
+ error = kthread_add((void(*)(void *))nandfs_cleaner, fsdev, NULL,
+ &fsdev->nd_cleaner, 0, 0, "nandfs_cleaner");
+ if (error)
+ printf("nandfs: could not start cleaner: %d\n", error);
+
+ return (error);
+}
+
+int
+nandfs_stop_cleaner(struct nandfs_device *fsdev)
+{
+
+ MPASS(fsdev->nd_cleaner != NULL);
+ nandfs_wakeup_wait_cleaner(fsdev, NANDFS_CLEANER_KILL);
+ fsdev->nd_cleaner = NULL;
+
+ DPRINTF(CLEAN, ("cleaner stopped\n"));
+ return (0);
+}
+
+static int
+nandfs_cleaner_finished(struct nandfs_device *fsdev)
+{
+ int exit;
+
+ mtx_lock(&fsdev->nd_clean_mtx);
+ fsdev->nd_cleaning = 0;
+ if (!fsdev->nd_cleaner_exit) {
+ DPRINTF(CLEAN, ("%s: sleep\n", __func__));
+ msleep(&fsdev->nd_cleaning, &fsdev->nd_clean_mtx, PRIBIO, "-",
+ hz * nandfs_cleaner_interval);
+ }
+ exit = fsdev->nd_cleaner_exit;
+ cv_broadcast(&fsdev->nd_clean_cv);
+ mtx_unlock(&fsdev->nd_clean_mtx);
+ if (exit) {
+ DPRINTF(CLEAN, ("%s: no longer active\n", __func__));
+ return (1);
+ }
+
+ return (0);
+}
+
+static void
+print_suinfo(struct nandfs_suinfo *suinfo, int nsegs)
+{
+ int i;
+
+ for (i = 0; i < nsegs; i++) {
+ DPRINTF(CLEAN, ("%jx %jd %c%c%c %10u\n",
+ suinfo[i].nsi_num, suinfo[i].nsi_lastmod,
+ (suinfo[i].nsi_flags &
+ (NANDFS_SEGMENT_USAGE_ACTIVE) ? 'a' : '-'),
+ (suinfo[i].nsi_flags &
+ (NANDFS_SEGMENT_USAGE_DIRTY) ? 'd' : '-'),
+ (suinfo[i].nsi_flags &
+ (NANDFS_SEGMENT_USAGE_ERROR) ? 'e' : '-'),
+ suinfo[i].nsi_blocks));
+ }
+}
+
+static int
+nandfs_cleaner_vblock_is_alive(struct nandfs_device *fsdev,
+ struct nandfs_vinfo *vinfo, struct nandfs_cpinfo *cp, uint32_t ncps)
+{
+ int64_t idx, min, max;
+
+ if (vinfo->nvi_end >= fsdev->nd_last_cno)
+ return (1);
+
+ if (ncps == 0)
+ return (0);
+
+ if (vinfo->nvi_end < cp[0].nci_cno ||
+ vinfo->nvi_start > cp[ncps - 1].nci_cno)
+ return (0);
+
+ idx = min = 0;
+ max = ncps - 1;
+ while (min <= max) {
+ idx = (min + max) / 2;
+ if (vinfo->nvi_start == cp[idx].nci_cno)
+ return (1);
+ if (vinfo->nvi_start < cp[idx].nci_cno)
+ max = idx - 1;
+ else
+ min = idx + 1;
+ }
+
+ return (vinfo->nvi_end >= cp[idx].nci_cno);
+}
+
+static void
+nandfs_cleaner_vinfo_mark_alive(struct nandfs_device *fsdev,
+ struct nandfs_vinfo *vinfo, uint32_t nmembs, struct nandfs_cpinfo *cp,
+ uint32_t ncps)
+{
+ uint32_t i;
+
+ for (i = 0; i < nmembs; i++)
+ vinfo[i].nvi_alive =
+ nandfs_cleaner_vblock_is_alive(fsdev, &vinfo[i], cp, ncps);
+}
+
+static int
+nandfs_cleaner_bdesc_is_alive(struct nandfs_device *fsdev,
+ struct nandfs_bdesc *bdesc)
+{
+ int alive;
+
+ alive = bdesc->bd_oblocknr == bdesc->bd_blocknr;
+ if (!alive)
+ MPASS(abs(bdesc->bd_oblocknr - bdesc->bd_blocknr) > 2);
+
+ return (alive);
+}
+
+static void
+nandfs_cleaner_bdesc_mark_alive(struct nandfs_device *fsdev,
+ struct nandfs_bdesc *bdesc, uint32_t nmembs)
+{
+ uint32_t i;
+
+ for (i = 0; i < nmembs; i++)
+ bdesc[i].bd_alive = nandfs_cleaner_bdesc_is_alive(fsdev,
+ &bdesc[i]);
+}
+
+static void
+nandfs_cleaner_iterate_psegment(struct nandfs_device *fsdev,
+ struct nandfs_segment_summary *segsum, union nandfs_binfo *binfo,
+ nandfs_daddr_t blk, struct nandfs_vinfo **vipp, struct nandfs_bdesc **bdpp)
+{
+ int i;
+
+ DPRINTF(CLEAN, ("%s nbinfos %x\n", __func__, segsum->ss_nbinfos));
+ for (i = 0; i < segsum->ss_nbinfos; i++) {
+ if (binfo[i].bi_v.bi_ino == NANDFS_DAT_INO) {
+ (*bdpp)->bd_oblocknr = blk + segsum->ss_nblocks -
+ segsum->ss_nbinfos + i;
+ /*
+ * XXX Hack
+ */
+ if (segsum->ss_flags & NANDFS_SS_SR)
+ (*bdpp)->bd_oblocknr--;
+ (*bdpp)->bd_level = binfo[i].bi_dat.bi_level;
+ (*bdpp)->bd_offset = binfo[i].bi_dat.bi_blkoff;
+ (*bdpp)++;
+ } else {
+ (*vipp)->nvi_ino = binfo[i].bi_v.bi_ino;
+ (*vipp)->nvi_vblocknr = binfo[i].bi_v.bi_vblocknr;
+ (*vipp)++;
+ }
+ }
+}
+
+static int
+nandfs_cleaner_iterate_segment(struct nandfs_device *fsdev, uint64_t segno,
+ struct nandfs_vinfo **vipp, struct nandfs_bdesc **bdpp, int *select)
+{
+ struct nandfs_segment_summary *segsum;
+ union nandfs_binfo *binfo;
+ struct buf *bp;
+ uint32_t nblocks;
+ nandfs_daddr_t curr, start, end;
+ int error = 0;
+
+ nandfs_get_segment_range(fsdev, segno, &start, &end);
+
+ DPRINTF(CLEAN, ("%s: segno %jx start %jx end %jx\n", __func__, segno,
+ start, end));
+
+ *select = 0;
+
+ for (curr = start; curr < end; curr += nblocks) {
+ error = nandfs_dev_bread(fsdev, curr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ nandfs_error("%s: couldn't load segment summary of %jx: %d\n",
+ __func__, segno, error);
+ return (error);
+ }
+
+ segsum = (struct nandfs_segment_summary *)bp->b_data;
+ binfo = (union nandfs_binfo *)(bp->b_data + segsum->ss_bytes);
+
+ if (!nandfs_segsum_valid(segsum)) {
+ brelse(bp);
+ nandfs_error("nandfs: invalid summary of segment %jx\n", segno);
+ return (error);
+ }
+
+ DPRINTF(CLEAN, ("%s: %jx magic %x bytes %x nblocks %x nbinfos "
+ "%x\n", __func__, segno, segsum->ss_magic, segsum->ss_bytes,
+ segsum->ss_nblocks, segsum->ss_nbinfos));
+
+ nandfs_cleaner_iterate_psegment(fsdev, segsum, binfo, curr,
+ vipp, bdpp);
+ nblocks = segsum->ss_nblocks;
+ brelse(bp);
+ }
+
+ if (error == 0)
+ *select = 1;
+
+ return (error);
+}
+
+static int
+nandfs_cleaner_choose_segment(struct nandfs_device *fsdev, uint64_t **segpp,
+ uint64_t nsegs, uint64_t *rseg)
+{
+ struct nandfs_suinfo *suinfo;
+ uint64_t i, ssegs;
+ int error;
+
+ suinfo = malloc(sizeof(*suinfo) * nsegs, M_NANDFSTEMP,
+ M_ZERO | M_WAITOK);
+
+ if (*rseg >= fsdev->nd_fsdata.f_nsegments)
+ *rseg = 0;
+
+retry:
+ error = nandfs_get_segment_info_filter(fsdev, suinfo, nsegs, *rseg,
+ &ssegs, NANDFS_SEGMENT_USAGE_DIRTY,
+ NANDFS_SEGMENT_USAGE_ACTIVE | NANDFS_SEGMENT_USAGE_ERROR |
+ NANDFS_SEGMENT_USAGE_GC);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+ if (ssegs == 0 && *rseg != 0) {
+ *rseg = 0;
+ goto retry;
+ }
+ if (ssegs > 0) {
+ print_suinfo(suinfo, ssegs);
+
+ for (i = 0; i < ssegs; i++) {
+ (**segpp) = suinfo[i].nsi_num;
+ (*segpp)++;
+ }
+ *rseg = suinfo[i - 1].nsi_num + 1;
+ }
+
+out:
+ free(suinfo, M_NANDFSTEMP);
+ return (error);
+}
+
+static int
+nandfs_cleaner_body(struct nandfs_device *fsdev, uint64_t *rseg)
+{
+ struct nandfs_vinfo *vinfo, *vip, *vipi;
+ struct nandfs_bdesc *bdesc, *bdp, *bdpi;
+ struct nandfs_cpstat cpstat;
+ struct nandfs_cpinfo *cpinfo = NULL;
+ uint64_t *segnums, *segp;
+ int select, selected;
+ int error = 0;
+ int nsegs;
+ int i;
+
+ nsegs = nandfs_cleaner_segments;
+
+ vip = vinfo = malloc(sizeof(*vinfo) *
+ fsdev->nd_fsdata.f_blocks_per_segment * nsegs, M_NANDFSTEMP,
+ M_ZERO | M_WAITOK);
+ bdp = bdesc = malloc(sizeof(*bdesc) *
+ fsdev->nd_fsdata.f_blocks_per_segment * nsegs, M_NANDFSTEMP,
+ M_ZERO | M_WAITOK);
+ segp = segnums = malloc(sizeof(*segnums) * nsegs, M_NANDFSTEMP,
+ M_WAITOK);
+
+ error = nandfs_cleaner_choose_segment(fsdev, &segp, nsegs, rseg);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+
+ if (segnums == segp)
+ goto out;
+
+ selected = 0;
+ for (i = 0; i < segp - segnums; i++) {
+ error = nandfs_cleaner_iterate_segment(fsdev, segnums[i], &vip,
+ &bdp, &select);
+ if (error) {
+ /*
+ * XXX deselect (see below)?
+ */
+ goto out;
+ }
+ if (!select)
+ segnums[i] = NANDFS_NOSEGMENT;
+ else {
+ error = nandfs_markgc_segment(fsdev, segnums[i]);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out;
+ }
+ selected++;
+ }
+ }
+
+ if (selected == 0) {
+ MPASS(vinfo == vip);
+ MPASS(bdesc == bdp);
+ goto out;
+ }
+
+ error = nandfs_get_cpstat(fsdev->nd_cp_node, &cpstat);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out;
+ }
+
+ if (cpstat.ncp_nss != 0) {
+ cpinfo = malloc(sizeof(struct nandfs_cpinfo) * cpstat.ncp_nss,
+ M_NANDFSTEMP, M_WAITOK);
+ error = nandfs_get_cpinfo(fsdev->nd_cp_node, 1, NANDFS_SNAPSHOT,
+ cpinfo, cpstat.ncp_nss, NULL);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out_locked;
+ }
+ }
+
+ NANDFS_WRITELOCK(fsdev);
+ DPRINTF(CLEAN, ("%s: got lock\n", __func__));
+
+ error = nandfs_get_dat_vinfo(fsdev, vinfo, vip - vinfo);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out_locked;
+ }
+
+ nandfs_cleaner_vinfo_mark_alive(fsdev, vinfo, vip - vinfo, cpinfo,
+ cpstat.ncp_nss);
+
+ error = nandfs_get_dat_bdescs(fsdev, bdesc, bdp - bdesc);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out_locked;
+ }
+
+ nandfs_cleaner_bdesc_mark_alive(fsdev, bdesc, bdp - bdesc);
+
+ DPRINTF(CLEAN, ("got:\n"));
+ for (vipi = vinfo; vipi < vip; vipi++) {
+ DPRINTF(CLEAN, ("v ino %jx vblocknr %jx start %jx end %jx "
+ "alive %d\n", vipi->nvi_ino, vipi->nvi_vblocknr,
+ vipi->nvi_start, vipi->nvi_end, vipi->nvi_alive));
+ }
+ for (bdpi = bdesc; bdpi < bdp; bdpi++) {
+ DPRINTF(CLEAN, ("b oblocknr %jx blocknr %jx offset %jx "
+ "alive %d\n", bdpi->bd_oblocknr, bdpi->bd_blocknr,
+ bdpi->bd_offset, bdpi->bd_alive));
+ }
+ DPRINTF(CLEAN, ("end list\n"));
+
+ error = nandfs_cleaner_clean_segments(fsdev, vinfo, vip - vinfo, NULL,
+ 0, bdesc, bdp - bdesc, segnums, segp - segnums);
+ if (error)
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+
+out_locked:
+ NANDFS_WRITEUNLOCK(fsdev);
+out:
+ free(cpinfo, M_NANDFSTEMP);
+ free(segnums, M_NANDFSTEMP);
+ free(bdesc, M_NANDFSTEMP);
+ free(vinfo, M_NANDFSTEMP);
+
+ return (error);
+}
+
+static void
+nandfs_cleaner(struct nandfs_device *fsdev)
+{
+ uint64_t checked_seg = 0;
+ int error;
+
+ while (!nandfs_cleaner_finished(fsdev)) {
+ if (!nandfs_cleaner_enable || rebooting)
+ continue;
+
+ DPRINTF(CLEAN, ("%s: run started\n", __func__));
+
+ fsdev->nd_cleaning = 1;
+
+ error = nandfs_cleaner_body(fsdev, &checked_seg);
+
+ DPRINTF(CLEAN, ("%s: run finished error %d\n", __func__,
+ error));
+ }
+
+ DPRINTF(CLEAN, ("%s: exiting\n", __func__));
+ kthread_exit();
+}
+
+static int
+nandfs_cleaner_clean_segments(struct nandfs_device *nffsdev,
+ struct nandfs_vinfo *vinfo, uint32_t nvinfo,
+ struct nandfs_period *pd, uint32_t npd,
+ struct nandfs_bdesc *bdesc, uint32_t nbdesc,
+ uint64_t *segments, uint32_t nsegs)
+{
+ struct nandfs_node *gc;
+ struct buf *bp;
+ uint32_t i;
+ int error = 0;
+
+ gc = nffsdev->nd_gc_node;
+
+ DPRINTF(CLEAN, ("%s: enter\n", __func__));
+
+ VOP_LOCK(NTOV(gc), LK_EXCLUSIVE);
+ for (i = 0; i < nvinfo; i++) {
+ if (!vinfo[i].nvi_alive)
+ continue;
+ DPRINTF(CLEAN, ("%s: read vblknr:%#jx blk:%#jx\n",
+ __func__, (uintmax_t)vinfo[i].nvi_vblocknr,
+ (uintmax_t)vinfo[i].nvi_blocknr));
+ error = nandfs_bread(nffsdev->nd_gc_node, vinfo[i].nvi_blocknr,
+ NULL, 0, &bp);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ VOP_UNLOCK(NTOV(gc), 0);
+ goto out;
+ }
+ nandfs_vblk_set(bp, vinfo[i].nvi_vblocknr);
+ nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED);
+ nandfs_dirty_buf(bp, 1);
+ }
+ VOP_UNLOCK(NTOV(gc), 0);
+
+ /* Delete checkpoints */
+ for (i = 0; i < npd; i++) {
+ DPRINTF(CLEAN, ("delete checkpoint: %jx\n",
+ (uintmax_t)pd[i].p_start));
+ error = nandfs_delete_cp(nffsdev->nd_cp_node, pd[i].p_start,
+ pd[i].p_end);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+ }
+
+ /* Update vblocks */
+ for (i = 0; i < nvinfo; i++) {
+ if (vinfo[i].nvi_alive)
+ continue;
+ DPRINTF(CLEAN, ("freeing vblknr: %jx\n", vinfo[i].nvi_vblocknr));
+ error = nandfs_vblock_free(nffsdev, vinfo[i].nvi_vblocknr);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+ }
+
+ error = nandfs_process_bdesc(nffsdev, bdesc, nbdesc);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+
+ /* Add segments to clean */
+ if (nffsdev->nd_free_count) {
+ nffsdev->nd_free_base = realloc(nffsdev->nd_free_base,
+ (nffsdev->nd_free_count + nsegs) * sizeof(uint64_t),
+ M_NANDFSTEMP, M_WAITOK | M_ZERO);
+ memcpy(&nffsdev->nd_free_base[nffsdev->nd_free_count], segments,
+ nsegs * sizeof(uint64_t));
+ nffsdev->nd_free_count += nsegs;
+ } else {
+ nffsdev->nd_free_base = malloc(nsegs * sizeof(uint64_t),
+ M_NANDFSTEMP, M_WAITOK|M_ZERO);
+ memcpy(nffsdev->nd_free_base, segments,
+ nsegs * sizeof(uint64_t));
+ nffsdev->nd_free_count = nsegs;
+ }
+
+out:
+
+ DPRINTF(CLEAN, ("%s: exit error %d\n", __func__, error));
+
+ return (error);
+}
+
+static int
+nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd,
+ uint64_t nmembs)
+{
+ struct nandfs_node *dat_node;
+ struct buf *bp;
+ uint64_t i;
+ int error;
+
+ dat_node = nffsdev->nd_dat_node;
+
+ VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE);
+
+ for (i = 0; i < nmembs; i++) {
+ if (!bd[i].bd_alive)
+ continue;
+ DPRINTF(CLEAN, ("%s: idx %jx offset %jx\n",
+ __func__, i, bd[i].bd_offset));
+ if (bd[i].bd_level) {
+ error = nandfs_bread_meta(dat_node, bd[i].bd_offset,
+ NULL, 0, &bp);
+ if (error) {
+ nandfs_error("%s: cannot read dat node "
+ "level:%d\n", __func__, bd[i].bd_level);
+ brelse(bp);
+ VOP_UNLOCK(NTOV(dat_node), 0);
+ return (error);
+ }
+ nandfs_dirty_buf_meta(bp, 1);
+ nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1);
+ } else {
+ error = nandfs_bread(dat_node, bd[i].bd_offset, NULL,
+ 0, &bp);
+ if (error) {
+ nandfs_error("%s: cannot read dat node\n",
+ __func__);
+ brelse(bp);
+ VOP_UNLOCK(NTOV(dat_node), 0);
+ return (error);
+ }
+ nandfs_dirty_buf(bp, 1);
+ }
+ DPRINTF(CLEAN, ("%s: bp: %p\n", __func__, bp));
+ }
+
+ VOP_UNLOCK(NTOV(dat_node), 0);
+
+ return (0);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_cpfile.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_cpfile.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,776 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_cpfile.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+
+
+static int
+nandfs_checkpoint_size(struct nandfs_device *fsdev)
+{
+
+ return (fsdev->nd_fsdata.f_checkpoint_size);
+}
+
+static int
+nandfs_checkpoint_blk_offset(struct nandfs_device *fsdev, uint64_t cn,
+ uint64_t *blk, uint64_t *offset)
+{
+ uint64_t off;
+ uint16_t cp_size, cp_per_blk;
+
+ KASSERT((cn), ("checkpoing cannot be zero"));
+
+ cp_size = fsdev->nd_fsdata.f_checkpoint_size;
+ cp_per_blk = fsdev->nd_blocksize / cp_size;
+ off = roundup(sizeof(struct nandfs_cpfile_header), cp_size) / cp_size;
+ off += (cn - 1);
+
+ *blk = off / cp_per_blk;
+ *offset = (off % cp_per_blk) * cp_size;
+
+ return (0);
+}
+
+static int
+nandfs_checkpoint_blk_remaining(struct nandfs_device *fsdev, uint64_t cn,
+ uint64_t blk, uint64_t offset)
+{
+ uint16_t cp_size, cp_remaining;
+
+ cp_size = fsdev->nd_fsdata.f_checkpoint_size;
+ cp_remaining = (fsdev->nd_blocksize - offset) / cp_size;
+
+ return (cp_remaining);
+}
+
+int
+nandfs_get_checkpoint(struct nandfs_device *fsdev, struct nandfs_node *cp_node,
+ uint64_t cn)
+{
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ if (cn != fsdev->nd_last_cno && cn != (fsdev->nd_last_cno + 1)) {
+ return (-1);
+ }
+
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (-1);
+ }
+
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (-1);
+
+
+ nandfs_checkpoint_blk_offset(fsdev, cn, &blk, &offset);
+
+ if (blk != 0) {
+ if (blk < cp_node->nn_inode.i_blocks)
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ else
+ error = nandfs_bcreate(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ if (bp)
+ brelse(bp);
+ return (-1);
+ }
+
+ nandfs_dirty_buf(bp, 1);
+ }
+
+ DPRINTF(CPFILE, ("%s: cn:%#jx entry block:%#jx offset:%#jx\n",
+ __func__, (uintmax_t)cn, (uintmax_t)blk, (uintmax_t)offset));
+
+ return (0);
+}
+
+int
+nandfs_set_checkpoint(struct nandfs_device *fsdev, struct nandfs_node *cp_node,
+ uint64_t cn, struct nandfs_inode *ifile_inode, uint64_t nblocks)
+{
+ struct nandfs_cpfile_header *cnh;
+ struct nandfs_checkpoint *cnp;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ if (cn != fsdev->nd_last_cno && cn != (fsdev->nd_last_cno + 1)) {
+ nandfs_error("%s: trying to set invalid chekpoint %jx - %jx\n",
+ __func__, cn, fsdev->nd_last_cno);
+ return (-1);
+ }
+
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return error;
+ }
+
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ cnh->ch_ncheckpoints++;
+
+ nandfs_checkpoint_blk_offset(fsdev, cn, &blk, &offset);
+
+ if(blk != 0) {
+ brelse(bp);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return error;
+ }
+ }
+
+ cnp = (struct nandfs_checkpoint *)((uint8_t *)bp->b_data + offset);
+ cnp->cp_flags = 0;
+ cnp->cp_checkpoints_count = 1;
+ memset(&cnp->cp_snapshot_list, 0, sizeof(struct nandfs_snapshot_list));
+ cnp->cp_cno = cn;
+ cnp->cp_create = fsdev->nd_ts.tv_sec;
+ cnp->cp_nblk_inc = nblocks;
+ cnp->cp_blocks_count = 0;
+ memcpy (&cnp->cp_ifile_inode, ifile_inode, sizeof(cnp->cp_ifile_inode));
+
+ DPRINTF(CPFILE, ("%s: cn:%#jx ctime:%#jx nblk:%#jx\n",
+ __func__, (uintmax_t)cn, (uintmax_t)cnp->cp_create,
+ (uintmax_t)nblocks));
+
+ brelse(bp);
+ return (0);
+}
+
+static int
+nandfs_cp_mounted(struct nandfs_device *nandfsdev, uint64_t cno)
+{
+ struct nandfsmount *nmp;
+ int mounted = 0;
+
+ mtx_lock(&nandfsdev->nd_mutex);
+ /* No double-mounting of the same checkpoint */
+ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
+ if (nmp->nm_mount_args.cpno == cno) {
+ mounted = 1;
+ break;
+ }
+ }
+ mtx_unlock(&nandfsdev->nd_mutex);
+
+ return (mounted);
+}
+
+static int
+nandfs_cp_set_snapshot(struct nandfs_node *cp_node, uint64_t cno)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_cpfile_header *cnh;
+ struct nandfs_checkpoint *cnp;
+ struct nandfs_snapshot_list *list;
+ struct buf *bp;
+ uint64_t blk, prev_blk, offset;
+ uint64_t curr, prev;
+ int error;
+
+ fsdev = cp_node->nn_nandfsdev;
+
+ /* Get snapshot data */
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ if (cnp->cp_flags & NANDFS_CHECKPOINT_INVALID) {
+ brelse(bp);
+ return (ENOENT);
+ }
+ if ((cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT)) {
+ brelse(bp);
+ return (EINVAL);
+ }
+
+ brelse(bp);
+ /* Get list from header */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ prev = list->ssl_prev;
+ brelse(bp);
+ prev_blk = ~(0);
+ curr = 0;
+ while (prev > cno) {
+ curr = prev;
+ nandfs_checkpoint_blk_offset(fsdev, prev, &prev_blk, &offset);
+ error = nandfs_bread(cp_node, prev_blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ prev = list->ssl_prev;
+ brelse(bp);
+ }
+
+ if (curr == 0) {
+ nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ } else {
+ nandfs_checkpoint_blk_offset(fsdev, curr, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ }
+
+ list->ssl_prev = cno;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+
+ /* Update snapshot for cno */
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ list->ssl_prev = prev;
+ list->ssl_next = curr;
+ cnp->cp_flags |= NANDFS_CHECKPOINT_SNAPSHOT;
+ nandfs_dirty_buf(bp, 1);
+
+ if (prev == 0) {
+ nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ } else {
+ /* Update snapshot list for prev */
+ nandfs_checkpoint_blk_offset(fsdev, prev, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ }
+ list->ssl_next = cno;
+ nandfs_dirty_buf(bp, 1);
+
+ /* Update header */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ cnh->ch_nsnapshots++;
+ nandfs_dirty_buf(bp, 1);
+
+ return (0);
+}
+
+static int
+nandfs_cp_clr_snapshot(struct nandfs_node *cp_node, uint64_t cno)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_cpfile_header *cnh;
+ struct nandfs_checkpoint *cnp;
+ struct nandfs_snapshot_list *list;
+ struct buf *bp;
+ uint64_t blk, offset, snapshot_cnt;
+ uint64_t next, prev;
+ int error;
+
+ fsdev = cp_node->nn_nandfsdev;
+
+ /* Get snapshot data */
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ if (cnp->cp_flags & NANDFS_CHECKPOINT_INVALID) {
+ brelse(bp);
+ return (ENOENT);
+ }
+ if (!(cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT)) {
+ brelse(bp);
+ return (EINVAL);
+ }
+
+ list = &cnp->cp_snapshot_list;
+ next = list->ssl_next;
+ prev = list->ssl_prev;
+ brelse(bp);
+
+ /* Get previous snapshot */
+ if (prev != 0) {
+ nandfs_checkpoint_blk_offset(fsdev, prev, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ } else {
+ nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ }
+
+ list->ssl_next = next;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ /* Get next snapshot */
+ if (next != 0) {
+ nandfs_checkpoint_blk_offset(fsdev, next, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ } else {
+ nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ }
+ list->ssl_prev = prev;
+ nandfs_dirty_buf(bp, 1);
+
+ /* Update snapshot list for cno */
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ list->ssl_prev = 0;
+ list->ssl_next = 0;
+ cnp->cp_flags &= !NANDFS_CHECKPOINT_SNAPSHOT;
+ nandfs_dirty_buf(bp, 1);
+
+ /* Update header */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ snapshot_cnt = cnh->ch_nsnapshots;
+ snapshot_cnt--;
+ cnh->ch_nsnapshots = snapshot_cnt;
+ nandfs_dirty_buf(bp, 1);
+
+ return (0);
+}
+
+int
+nandfs_chng_cpmode(struct nandfs_node *node, struct nandfs_cpmode *ncpm)
+{
+ struct nandfs_device *fsdev;
+ uint64_t cno = ncpm->ncpm_cno;
+ int mode = ncpm->ncpm_mode;
+ int ret;
+
+ fsdev = node->nn_nandfsdev;
+ VOP_LOCK(NTOV(node), LK_EXCLUSIVE);
+ switch (mode) {
+ case NANDFS_CHECKPOINT:
+ if (nandfs_cp_mounted(fsdev, cno)) {
+ ret = EBUSY;
+ } else
+ ret = nandfs_cp_clr_snapshot(node, cno);
+ break;
+ case NANDFS_SNAPSHOT:
+ ret = nandfs_cp_set_snapshot(node, cno);
+ break;
+ default:
+ ret = EINVAL;
+ break;
+ }
+ VOP_UNLOCK(NTOV(node), 0);
+
+ return (ret);
+}
+
+static void
+nandfs_cpinfo_fill(struct nandfs_checkpoint *cnp, struct nandfs_cpinfo *nci)
+{
+
+ nci->nci_flags = cnp->cp_flags;
+ nci->nci_pad = 0;
+ nci->nci_cno = cnp->cp_cno;
+ nci->nci_create = cnp->cp_create;
+ nci->nci_nblk_inc = cnp->cp_nblk_inc;
+ nci->nci_blocks_count = cnp->cp_blocks_count;
+ nci->nci_next = cnp->cp_snapshot_list.ssl_next;
+ DPRINTF(CPFILE, ("%s: cn:%#jx ctime:%#jx\n",
+ __func__, (uintmax_t)cnp->cp_cno,
+ (uintmax_t)cnp->cp_create));
+}
+
+static int
+nandfs_get_cpinfo_cp(struct nandfs_node *node, uint64_t cno,
+ struct nandfs_cpinfo *nci, uint32_t mnmembs, uint32_t *nmembs)
+{
+ struct nandfs_device *fsdev;
+ struct buf *bp;
+ uint64_t blk, offset, last_cno, i;
+ uint16_t remaining;
+ int error;
+#ifdef INVARIANTS
+ uint64_t testblk, testoffset;
+#endif
+
+ if (cno == 0) {
+ return (ENOENT);
+ }
+
+ if (mnmembs < 1) {
+ return (EINVAL);
+ }
+
+ fsdev = node->nn_nandfsdev;
+ last_cno = fsdev->nd_last_cno;
+ DPRINTF(CPFILE, ("%s: cno:%#jx mnmembs: %#jx last:%#jx\n", __func__,
+ (uintmax_t)cno, (uintmax_t)mnmembs,
+ (uintmax_t)fsdev->nd_last_cno));
+
+ /*
+ * do {
+ * get block
+ * read checkpoints until we hit last checkpoint, end of block or
+ * requested number
+ * } while (last read checkpoint <= last checkpoint on fs &&
+ * read checkpoints < request number);
+ */
+ *nmembs = i = 0;
+ do {
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ remaining = nandfs_checkpoint_blk_remaining(fsdev, cno,
+ blk, offset);
+ error = nandfs_bread(node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ while (cno <= last_cno && i < mnmembs && remaining) {
+#ifdef INVARIANTS
+ nandfs_checkpoint_blk_offset(fsdev, cno, &testblk,
+ &testoffset);
+ KASSERT(testblk == blk, ("testblk != blk"));
+ KASSERT(testoffset == offset, ("testoffset != offset"));
+#endif
+ DPRINTF(CPFILE, ("%s: cno %#jx\n", __func__,
+ (uintmax_t)cno));
+
+ nandfs_cpinfo_fill((struct nandfs_checkpoint *)
+ (bp->b_data + offset), nci);
+ offset += nandfs_checkpoint_size(fsdev);
+ i++;
+ nci++;
+ cno++;
+ (*nmembs)++;
+ remaining--;
+ }
+ brelse(bp);
+ } while (cno <= last_cno && i < mnmembs);
+
+ return (0);
+}
+
+static int
+nandfs_get_cpinfo_sp(struct nandfs_node *node, uint64_t cno,
+ struct nandfs_cpinfo *nci, uint32_t mnmembs, uint32_t *nmembs)
+{
+ struct nandfs_checkpoint *cnp;
+ struct nandfs_cpfile_header *cnh;
+ struct nandfs_device *fsdev;
+ struct buf *bp = NULL;
+ uint64_t curr = 0;
+ uint64_t blk, offset, curr_cno;
+ uint32_t flag;
+ int i, error;
+
+ if (cno == 0 || cno == ~(0))
+ return (ENOENT);
+
+ fsdev = node->nn_nandfsdev;
+ curr_cno = cno;
+
+ if (nmembs)
+ *nmembs = 0;
+ if (curr_cno == 1) {
+ /* Get list from header */
+ error = nandfs_bread(node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ curr_cno = cnh->ch_snapshot_list.ssl_next;
+ brelse(bp);
+ bp = NULL;
+
+ /* No snapshots */
+ if (curr_cno == 0)
+ return (0);
+ }
+
+ for (i = 0; i < mnmembs; i++, nci++) {
+ nandfs_checkpoint_blk_offset(fsdev, curr_cno, &blk, &offset);
+ if (i == 0 || curr != blk) {
+ if (bp)
+ brelse(bp);
+ error = nandfs_bread(node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (ENOENT);
+ }
+ curr = blk;
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ flag = cnp->cp_flags;
+ if (!(flag & NANDFS_CHECKPOINT_SNAPSHOT) ||
+ (flag & NANDFS_CHECKPOINT_INVALID))
+ break;
+
+ nci->nci_flags = flag;
+ nci->nci_pad = 0;
+ nci->nci_cno = cnp->cp_cno;
+ nci->nci_create = cnp->cp_create;
+ nci->nci_nblk_inc = cnp->cp_nblk_inc;
+ nci->nci_blocks_count = cnp->cp_blocks_count;
+ nci->nci_next = cnp->cp_snapshot_list.ssl_next;
+ if (nmembs)
+ (*nmembs)++;
+
+ curr_cno = nci->nci_next;
+ if (!curr_cno)
+ break;
+ }
+
+ brelse(bp);
+
+ return (0);
+}
+
+int
+nandfs_get_cpinfo(struct nandfs_node *node, uint64_t cno, uint16_t flags,
+ struct nandfs_cpinfo *nci, uint32_t nmembs, uint32_t *nnmembs)
+{
+ int error;
+
+ VOP_LOCK(NTOV(node), LK_EXCLUSIVE);
+ switch (flags) {
+ case NANDFS_CHECKPOINT:
+ error = nandfs_get_cpinfo_cp(node, cno, nci, nmembs, nnmembs);
+ break;
+ case NANDFS_SNAPSHOT:
+ error = nandfs_get_cpinfo_sp(node, cno, nci, nmembs, nnmembs);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ VOP_UNLOCK(NTOV(node), 0);
+
+ return (error);
+}
+
+int
+nandfs_get_cpinfo_ioctl(struct nandfs_node *node, struct nandfs_argv *nargv)
+{
+ struct nandfs_cpinfo *nci;
+ uint64_t cno = nargv->nv_index;
+ void *buf = (void *)((uintptr_t)nargv->nv_base);
+ uint16_t flags = nargv->nv_flags;
+ uint32_t nmembs = 0;
+ int error;
+
+ if (nargv->nv_nmembs > NANDFS_CPINFO_MAX)
+ return (EINVAL);
+
+ nci = malloc(sizeof(struct nandfs_cpinfo) * nargv->nv_nmembs,
+ M_NANDFSTEMP, M_WAITOK | M_ZERO);
+
+ error = nandfs_get_cpinfo(node, cno, flags, nci, nargv->nv_nmembs, &nmembs);
+
+ if (error == 0) {
+ nargv->nv_nmembs = nmembs;
+ error = copyout(nci, buf,
+ sizeof(struct nandfs_cpinfo) * nmembs);
+ }
+
+ free(nci, M_NANDFSTEMP);
+ return (error);
+}
+
+int
+nandfs_delete_cp(struct nandfs_node *node, uint64_t start, uint64_t end)
+{
+ struct nandfs_checkpoint *cnp;
+ struct nandfs_device *fsdev;
+ struct buf *bp;
+ uint64_t cno = start, blk, offset;
+ int error;
+
+ DPRINTF(CPFILE, ("%s: delete cno %jx-%jx\n", __func__, start, end));
+ VOP_LOCK(NTOV(node), LK_EXCLUSIVE);
+ fsdev = node->nn_nandfsdev;
+ for (cno = start; cno <= end; cno++) {
+ if (!cno)
+ continue;
+
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(node, blk, NOCRED, 0, &bp);
+ if (error) {
+ VOP_UNLOCK(NTOV(node), 0);
+ brelse(bp);
+ return (error);
+ }
+
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ if (cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(node), 0);
+ return (0);
+ }
+
+ cnp->cp_flags |= NANDFS_CHECKPOINT_INVALID;
+
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+ }
+ VOP_UNLOCK(NTOV(node), 0);
+
+ return (0);
+}
+
+int
+nandfs_make_snap(struct nandfs_device *fsdev, uint64_t *cno)
+{
+ struct nandfs_cpmode cpm;
+ int error;
+
+ *cno = cpm.ncpm_cno = fsdev->nd_last_cno;
+ cpm.ncpm_mode = NANDFS_SNAPSHOT;
+ error = nandfs_chng_cpmode(fsdev->nd_cp_node, &cpm);
+ return (error);
+}
+
+int
+nandfs_delete_snap(struct nandfs_device *fsdev, uint64_t cno)
+{
+ struct nandfs_cpmode cpm;
+ int error;
+
+ cpm.ncpm_cno = cno;
+ cpm.ncpm_mode = NANDFS_CHECKPOINT;
+ error = nandfs_chng_cpmode(fsdev->nd_cp_node, &cpm);
+ return (error);
+}
+
+int nandfs_get_cpstat(struct nandfs_node *cp_node, struct nandfs_cpstat *ncp)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_cpfile_header *cnh;
+ struct buf *bp;
+ int error;
+
+ VOP_LOCK(NTOV(cp_node), LK_EXCLUSIVE);
+ fsdev = cp_node->nn_nandfsdev;
+
+ /* Get header */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (error);
+ }
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ ncp->ncp_cno = fsdev->nd_last_cno;
+ ncp->ncp_ncps = cnh->ch_ncheckpoints;
+ ncp->ncp_nss = cnh->ch_nsnapshots;
+ DPRINTF(CPFILE, ("%s: cno:%#jx ncps:%#jx nss:%#jx\n",
+ __func__, ncp->ncp_cno, ncp->ncp_ncps, ncp->ncp_nss));
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+
+ return (0);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_dat.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_dat.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,344 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_dat.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+int
+nandfs_vblock_alloc(struct nandfs_device *nandfsdev, nandfs_daddr_t *vblock)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ struct nandfs_dat_entry *dat_entry;
+ uint64_t start;
+ uint32_t entry;
+ int locked, error;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+ start = nandfsdev->nd_last_cno + 1;
+
+ locked = NANDFS_VOP_ISLOCKED(NTOV(dat));
+ if (!locked)
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ req.entrynum = 0;
+
+ /* Alloc vblock number */
+ error = nandfs_find_free_entry(mdt, dat, &req);
+ if (error) {
+ nandfs_error("%s: cannot find free vblk entry\n",
+ __func__);
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+ }
+
+ /* Read/create buffer */
+ error = nandfs_get_entry_block(mdt, dat, &req, &entry, 1);
+ if (error) {
+ nandfs_error("%s: cannot get free vblk entry\n",
+ __func__);
+ nandfs_abort_entry(&req);
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+ }
+
+ /* Fill out vblock data */
+ dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data;
+ dat_entry[entry].de_start = start;
+ dat_entry[entry].de_end = UINTMAX_MAX;
+ dat_entry[entry].de_blocknr = 0;
+
+ /* Commit allocation */
+ error = nandfs_alloc_entry(mdt, &req);
+ if (error) {
+ nandfs_error("%s: cannot get free vblk entry\n",
+ __func__);
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+ }
+
+ /* Return allocated vblock */
+ *vblock = req.entrynum;
+ DPRINTF(DAT, ("%s: allocated vblock %#jx\n",
+ __func__, (uintmax_t)*vblock));
+
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+}
+
+int
+nandfs_vblock_assign(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock,
+ nandfs_lbn_t block)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ struct nandfs_dat_entry *dat_entry;
+ uint32_t entry;
+ int locked, error;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+
+ locked = NANDFS_VOP_ISLOCKED(NTOV(dat));
+ if (!locked)
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ req.entrynum = vblock;
+
+ error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0);
+ if (!error) {
+ dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data;
+ dat_entry[entry].de_blocknr = block;
+
+ DPRINTF(DAT, ("%s: assing vblock %jx->%jx\n",
+ __func__, (uintmax_t)vblock, (uintmax_t)block));
+
+ /*
+ * It is mostly called from syncer() so
+ * we want to force making buf dirty
+ */
+ error = nandfs_dirty_buf(req.bp_entry, 1);
+ }
+
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+
+ return (error);
+}
+
+int
+nandfs_vblock_end(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ struct nandfs_dat_entry *dat_entry;
+ uint64_t end;
+ uint32_t entry;
+ int locked, error;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+ end = nandfsdev->nd_last_cno;
+
+ locked = NANDFS_VOP_ISLOCKED(NTOV(dat));
+ if (!locked)
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ req.entrynum = vblock;
+
+ error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0);
+ if (!error) {
+ dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data;
+ dat_entry[entry].de_end = end;
+ DPRINTF(DAT, ("%s: end vblock %#jx at checkpoint %#jx\n",
+ __func__, (uintmax_t)vblock, (uintmax_t)end));
+
+ /*
+ * It is mostly called from syncer() so
+ * we want to force making buf dirty
+ */
+ error = nandfs_dirty_buf(req.bp_entry, 1);
+ }
+
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+
+ return (error);
+}
+
+int
+nandfs_vblock_free(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ int error;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ req.entrynum = vblock;
+
+ error = nandfs_find_entry(mdt, dat, &req);
+ if (!error) {
+ DPRINTF(DAT, ("%s: vblk %#jx\n", __func__, (uintmax_t)vblock));
+ nandfs_free_entry(mdt, &req);
+ }
+
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+}
+
+int
+nandfs_get_dat_vinfo_ioctl(struct nandfs_device *nandfsdev, struct nandfs_argv *nargv)
+{
+ struct nandfs_vinfo *vinfo;
+ size_t size;
+ int error;
+
+ if (nargv->nv_nmembs > NANDFS_VINFO_MAX)
+ return (EINVAL);
+
+ size = sizeof(struct nandfs_vinfo) * nargv->nv_nmembs;
+ vinfo = malloc(size, M_NANDFSTEMP, M_WAITOK|M_ZERO);
+
+ error = copyin((void *)(uintptr_t)nargv->nv_base, vinfo, size);
+ if (error) {
+ free(vinfo, M_NANDFSTEMP);
+ return (error);
+ }
+
+ error = nandfs_get_dat_vinfo(nandfsdev, vinfo, nargv->nv_nmembs);
+ if (error == 0)
+ error = copyout(vinfo, (void *)(uintptr_t)nargv->nv_base, size);
+ free(vinfo, M_NANDFSTEMP);
+ return (error);
+}
+
+int
+nandfs_get_dat_vinfo(struct nandfs_device *nandfsdev, struct nandfs_vinfo *vinfo,
+ uint32_t nmembs)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ struct nandfs_dat_entry *dat_entry;
+ uint32_t i, idx;
+ int error = 0;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+
+ DPRINTF(DAT, ("%s: nmembs %#x\n", __func__, nmembs));
+
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+
+ for (i = 0; i < nmembs; i++) {
+ req.entrynum = vinfo[i].nvi_vblocknr;
+
+ error = nandfs_get_entry_block(mdt, dat,&req, &idx, 0);
+ if (error)
+ break;
+
+ dat_entry = ((struct nandfs_dat_entry *) req.bp_entry->b_data);
+ vinfo[i].nvi_start = dat_entry[idx].de_start;
+ vinfo[i].nvi_end = dat_entry[idx].de_end;
+ vinfo[i].nvi_blocknr = dat_entry[idx].de_blocknr;
+
+ DPRINTF(DAT, ("%s: vinfo: %jx[%jx-%jx]->%jx\n",
+ __func__, vinfo[i].nvi_vblocknr, vinfo[i].nvi_start,
+ vinfo[i].nvi_end, vinfo[i].nvi_blocknr));
+
+ brelse(req.bp_entry);
+ }
+
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+}
+
+int
+nandfs_get_dat_bdescs_ioctl(struct nandfs_device *nffsdev,
+ struct nandfs_argv *nargv)
+{
+ struct nandfs_bdesc *bd;
+ size_t size;
+ int error;
+
+ size = nargv->nv_nmembs * sizeof(struct nandfs_bdesc);
+ bd = malloc(size, M_NANDFSTEMP, M_WAITOK);
+ error = copyin((void *)(uintptr_t)nargv->nv_base, bd, size);
+ if (error) {
+ free(bd, M_NANDFSTEMP);
+ return (error);
+ }
+
+ error = nandfs_get_dat_bdescs(nffsdev, bd, nargv->nv_nmembs);
+
+ if (error == 0)
+ error = copyout(bd, (void *)(uintptr_t)nargv->nv_base, size);
+
+ free(bd, M_NANDFSTEMP);
+ return (error);
+}
+
+int
+nandfs_get_dat_bdescs(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd,
+ uint32_t nmembs)
+{
+ struct nandfs_node *dat_node;
+ uint64_t map;
+ uint32_t i;
+ int error = 0;
+
+ dat_node = nffsdev->nd_dat_node;
+
+ VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE);
+
+ for (i = 0; i < nmembs; i++) {
+ DPRINTF(CLEAN,
+ ("%s: bd ino:%#jx oblk:%#jx blocknr:%#jx off:%#jx\n",
+ __func__, (uintmax_t)bd[i].bd_ino,
+ (uintmax_t)bd[i].bd_oblocknr, (uintmax_t)bd[i].bd_blocknr,
+ (uintmax_t)bd[i].bd_offset));
+
+ error = nandfs_bmap_lookup(dat_node, bd[i].bd_offset, &map);
+ if (error)
+ break;
+ bd[i].bd_blocknr = map;
+ }
+
+ VOP_UNLOCK(NTOV(dat_node), 0);
+ return (error);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_dir.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_dir.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,314 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_dir.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/signalvar.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+
+int
+nandfs_add_dirent(struct vnode *dvp, uint64_t ino, char *nameptr, long namelen,
+ uint8_t type)
+{
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfs_dir_entry *dirent, *pdirent;
+ uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize;
+ uint64_t filesize = dir_node->nn_inode.i_size;
+ uint64_t inode_blks = dir_node->nn_inode.i_blocks;
+ uint32_t off, rest;
+ uint8_t *pos;
+ struct buf *bp;
+ int error;
+
+ pdirent = NULL;
+ bp = NULL;
+ if (inode_blks) {
+ error = nandfs_bread(dir_node, inode_blks - 1, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ pos = bp->b_data;
+ off = 0;
+ while (off < blocksize) {
+ pdirent = (struct nandfs_dir_entry *) (pos + off);
+ if (!pdirent->rec_len) {
+ pdirent = NULL;
+ break;
+ }
+ off += pdirent->rec_len;
+ }
+
+ if (pdirent)
+ rest = pdirent->rec_len -
+ NANDFS_DIR_REC_LEN(pdirent->name_len);
+ else
+ rest = blocksize;
+
+ if (rest < NANDFS_DIR_REC_LEN(namelen)) {
+ /* Do not update pdirent as new block is created */
+ pdirent = NULL;
+ brelse(bp);
+ /* Set to NULL to create new */
+ bp = NULL;
+ filesize += rest;
+ }
+ }
+
+ /* If no bp found create new */
+ if (!bp) {
+ error = nandfs_bcreate(dir_node, inode_blks, NOCRED, 0, &bp);
+ if (error)
+ return (error);
+ off = 0;
+ pos = bp->b_data;
+ }
+
+ /* Modify pdirent if exists */
+ if (pdirent) {
+ DPRINTF(LOOKUP, ("modify pdirent %p\n", pdirent));
+ /* modify last de */
+ off -= pdirent->rec_len;
+ pdirent->rec_len =
+ NANDFS_DIR_REC_LEN(pdirent->name_len);
+ off += pdirent->rec_len;
+ }
+
+ /* Create new dirent */
+ dirent = (struct nandfs_dir_entry *) (pos + off);
+ dirent->rec_len = blocksize - off;
+ dirent->inode = ino;
+ dirent->name_len = namelen;
+ memset(dirent->name, 0, NANDFS_DIR_NAME_LEN(namelen));
+ memcpy(dirent->name, nameptr, namelen);
+ dirent->file_type = type;
+
+ filesize += NANDFS_DIR_REC_LEN(dirent->name_len);
+
+ DPRINTF(LOOKUP, ("create dir_entry '%.*s' at %p with size %x "
+ "new filesize: %jx\n",
+ (int)namelen, dirent->name, dirent, dirent->rec_len,
+ (uintmax_t)filesize));
+
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ dir_node->nn_inode.i_size = filesize;
+ dir_node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ vnode_pager_setsize(dvp, filesize);
+
+ return (0);
+}
+
+int
+nandfs_remove_dirent(struct vnode *dvp, struct nandfs_node *node,
+ struct componentname *cnp)
+{
+ struct nandfs_node *dir_node;
+ struct nandfs_dir_entry *dirent, *pdirent;
+ struct buf *bp;
+ uint64_t filesize, blocknr, ino, offset;
+ uint32_t blocksize, limit, off;
+ uint16_t newsize;
+ uint8_t *pos;
+ int error, found;
+
+ dir_node = VTON(dvp);
+ filesize = dir_node->nn_inode.i_size;
+ if (!filesize)
+ return (0);
+
+ if (node) {
+ offset = node->nn_diroff;
+ ino = node->nn_ino;
+ } else {
+ offset = dir_node->nn_diroff;
+ ino = NANDFS_WHT_INO;
+ }
+
+ dirent = pdirent = NULL;
+ blocksize = dir_node->nn_nandfsdev->nd_blocksize;
+ blocknr = offset / blocksize;
+
+ DPRINTF(LOOKUP, ("rm direntry dvp %p node %p ino %#jx at off %#jx\n",
+ dvp, node, (uintmax_t)ino, (uintmax_t)offset));
+
+ error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ pos = bp->b_data;
+ off = 0;
+ found = 0;
+ limit = offset % blocksize;
+ pdirent = (struct nandfs_dir_entry *) bp->b_data;
+ while (off <= limit) {
+ dirent = (struct nandfs_dir_entry *) (pos + off);
+
+ if ((off == limit) &&
+ (dirent->inode == ino)) {
+ found = 1;
+ break;
+ }
+ if (dirent->inode != 0)
+ pdirent = dirent;
+ off += dirent->rec_len;
+ }
+
+ if (!found) {
+ nandfs_error("cannot find entry to remove");
+ brelse(bp);
+ return (error);
+ }
+ DPRINTF(LOOKUP,
+ ("rm dirent ino %#jx at %#x with size %#x\n",
+ (uintmax_t)dirent->inode, off, dirent->rec_len));
+
+ newsize = (uintptr_t)dirent - (uintptr_t)pdirent;
+ newsize += dirent->rec_len;
+ pdirent->rec_len = newsize;
+ dirent->inode = 0;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ dir_node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ /* If last one modify filesize */
+ if ((offset + NANDFS_DIR_REC_LEN(dirent->name_len)) == filesize) {
+ filesize = blocknr * blocksize +
+ ((uintptr_t)pdirent - (uintptr_t)pos) +
+ NANDFS_DIR_REC_LEN(pdirent->name_len);
+ dir_node->nn_inode.i_size = filesize;
+ }
+
+ return (0);
+}
+
+int
+nandfs_update_parent_dir(struct vnode *dvp, uint64_t newparent)
+{
+ struct nandfs_dir_entry *dirent;
+ struct nandfs_node *dir_node;
+ struct buf *bp;
+ int error;
+
+ dir_node = VTON(dvp);
+ error = nandfs_bread(dir_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ dirent = (struct nandfs_dir_entry *)bp->b_data;
+ dirent->inode = newparent;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ return (0);
+}
+
+int
+nandfs_update_dirent(struct vnode *dvp, struct nandfs_node *fnode,
+ struct nandfs_node *tnode)
+{
+ struct nandfs_node *dir_node;
+ struct nandfs_dir_entry *dirent;
+ struct buf *bp;
+ uint64_t file_size, blocknr;
+ uint32_t blocksize, off;
+ uint8_t *pos;
+ int error;
+
+ dir_node = VTON(dvp);
+ file_size = dir_node->nn_inode.i_size;
+ if (!file_size)
+ return (0);
+
+ DPRINTF(LOOKUP,
+ ("chg direntry dvp %p ino %#jx to in %#jx at off %#jx\n",
+ dvp, (uintmax_t)tnode->nn_ino, (uintmax_t)fnode->nn_ino,
+ (uintmax_t)tnode->nn_diroff));
+
+ blocksize = dir_node->nn_nandfsdev->nd_blocksize;
+ blocknr = tnode->nn_diroff / blocksize;
+ off = tnode->nn_diroff % blocksize;
+ error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ pos = bp->b_data;
+ dirent = (struct nandfs_dir_entry *) (pos + off);
+ KASSERT((dirent->inode == tnode->nn_ino),
+ ("direntry mismatch"));
+
+ dirent->inode = fnode->nn_ino;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ return (0);
+}
+
+int
+nandfs_init_dir(struct vnode *dvp, uint64_t ino, uint64_t parent_ino)
+{
+
+ if (nandfs_add_dirent(dvp, parent_ino, "..", 2, DT_DIR) ||
+ nandfs_add_dirent(dvp, ino, ".", 1, DT_DIR)) {
+ nandfs_error("%s: cannot initialize dir ino:%jd(pino:%jd)\n",
+ __func__, ino, parent_ino);
+ return (-1);
+ }
+ return (0);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_fs.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_fs.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,565 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Original definitions written by Koji Sato <koji at osrg.net>
+ * and Ryusuke Konishi <ryusuke at osrg.net>
+ * From: NetBSD: nandfs_fs.h,v 1.1 2009/07/18 16:31:42 reinoud
+ *
+ * $FreeBSD: head/sys/fs/nandfs/nandfs_fs.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _NANDFS_FS_H
+#define _NANDFS_FS_H
+
+#include <sys/uuid.h>
+
+#define MNINDIR(fsdev) ((fsdev)->nd_blocksize / sizeof(nandfs_daddr_t))
+
+/*
+ * Inode structure. There are a few dedicated inode numbers that are
+ * defined here first.
+ */
+#define NANDFS_WHT_INO 1 /* Whiteout ino */
+#define NANDFS_ROOT_INO 2 /* Root file inode */
+#define NANDFS_DAT_INO 3 /* DAT file */
+#define NANDFS_CPFILE_INO 4 /* checkpoint file */
+#define NANDFS_SUFILE_INO 5 /* segment usage file */
+#define NANDFS_IFILE_INO 6 /* ifile */
+#define NANDFS_GC_INO 7 /* Cleanerd node */
+#define NANDFS_ATIME_INO 8 /* Atime file (reserved) */
+#define NANDFS_XATTR_INO 9 /* Xattribute file (reserved) */
+#define NANDFS_SKETCH_INO 10 /* Sketch file (obsolete) */
+#define NANDFS_USER_INO 11 /* First user's file inode number */
+
+#define NANDFS_SYS_NODE(ino) \
+ (((ino) >= NANDFS_DAT_INO) && ((ino) <= NANDFS_GC_INO))
+
+#define NDADDR 12 /* Direct addresses in inode. */
+#define NIADDR 3 /* Indirect addresses in inode. */
+
+typedef int64_t nandfs_daddr_t;
+typedef int64_t nandfs_lbn_t;
+
+struct nandfs_inode {
+ uint64_t i_blocks; /* 0: size in device blocks */
+ uint64_t i_size; /* 8: size in bytes */
+ uint64_t i_ctime; /* 16: creation time in seconds */
+ uint64_t i_mtime; /* 24: modification time in seconds part*/
+ uint32_t i_ctime_nsec; /* 32: creation time nanoseconds part */
+ uint32_t i_mtime_nsec; /* 36: modification time in nanoseconds */
+ uint32_t i_uid; /* 40: user id */
+ uint32_t i_gid; /* 44: group id */
+ uint16_t i_mode; /* 48: file mode */
+ uint16_t i_links_count; /* 50: number of references to the inode*/
+ uint32_t i_flags; /* 52: NANDFS_*_FL flags */
+ nandfs_daddr_t i_special; /* 56: special */
+ nandfs_daddr_t i_db[NDADDR]; /* 64: Direct disk blocks. */
+ nandfs_daddr_t i_ib[NIADDR]; /* 160: Indirect disk blocks. */
+ uint64_t i_xattr; /* 184: reserved for extended attributes*/
+ uint32_t i_generation; /* 192: file generation for NFS */
+ uint32_t i_pad[15]; /* 196: make it 64 bits aligned */
+};
+
+#ifdef _KERNEL
+CTASSERT(sizeof(struct nandfs_inode) == 256);
+#endif
+
+/*
+ * Each checkpoint/snapshot has a super root.
+ *
+ * The super root holds the inodes of the three system files: `dat', `cp' and
+ * 'su' files. All other FS state is defined by those.
+ *
+ * It is CRC checksum'ed and time stamped.
+ */
+
+struct nandfs_super_root {
+ uint32_t sr_sum; /* check-sum */
+ uint16_t sr_bytes; /* byte count of this structure */
+ uint16_t sr_flags; /* reserved for flags */
+ uint64_t sr_nongc_ctime; /* timestamp, not for cleaner(?) */
+ struct nandfs_inode sr_dat; /* DAT, virt->phys translation inode */
+ struct nandfs_inode sr_cpfile; /* CP, checkpoints inode */
+ struct nandfs_inode sr_sufile; /* SU, segment usage inode */
+};
+
+#define NANDFS_SR_MDT_OFFSET(inode_size, i) \
+ ((uint32_t)&((struct nandfs_super_root *)0)->sr_dat + \
+ (inode_size) * (i))
+
+#define NANDFS_SR_DAT_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 0)
+#define NANDFS_SR_CPFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 1)
+#define NANDFS_SR_SUFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 2)
+#define NANDFS_SR_BYTES (sizeof(struct nandfs_super_root))
+
+/*
+ * The superblock describes the basic structure and mount history. It also
+ * records some sizes of structures found on the disc for sanity checks.
+ *
+ * The superblock is stored at two places: NANDFS_SB_OFFSET_BYTES and
+ * NANDFS_SB2_OFFSET_BYTES.
+ */
+
+/* File system states stored on media in superblock's sbp->s_state */
+#define NANDFS_VALID_FS 0x0001 /* cleanly unmounted and all is ok */
+#define NANDFS_ERROR_FS 0x0002 /* there were errors detected, fsck */
+#define NANDFS_RESIZE_FS 0x0004 /* resize required, XXX unknown flag*/
+#define NANDFS_MOUNT_STATE_BITS "\20\1VALID_FS\2ERROR_FS\3RESIZE_FS"
+
+/*
+ * Brief description of control structures:
+ *
+ * NANDFS_NFSAREAS first blocks contain fsdata and some amount of super blocks.
+ * Simple round-robin policy is used in order to choose which block will
+ * contain new super block.
+ *
+ * Simple case with 2 blocks:
+ * 1: fsdata sblock1 [sblock3 [sblock5 ..]]
+ * 2: fsdata sblock2 [sblock4 [sblock6 ..]]
+ */
+struct nandfs_fsdata {
+ uint16_t f_magic;
+ uint16_t f_bytes;
+
+ uint32_t f_sum; /* checksum of fsdata */
+ uint32_t f_rev_level; /* major disk format revision */
+
+ uint64_t f_ctime; /* creation time (execution time
+ of newfs) */
+ /* Block size represented as: blocksize = 1 << (f_log_block_size + 10) */
+ uint32_t f_log_block_size;
+
+ uint16_t f_inode_size; /* size of an inode */
+ uint16_t f_dat_entry_size; /* size of a dat entry */
+ uint16_t f_checkpoint_size; /* size of a checkpoint */
+ uint16_t f_segment_usage_size; /* size of a segment usage */
+
+ uint16_t f_sbbytes; /* byte count of CRC calculation
+ for super blocks. s_reserved
+ is excluded! */
+
+ uint16_t f_errors; /* behaviour on detecting errors */
+
+ uint32_t f_erasesize;
+ uint64_t f_nsegments; /* number of segm. in filesystem */
+ nandfs_daddr_t f_first_data_block; /* 1st seg disk block number */
+ uint32_t f_blocks_per_segment; /* number of blocks per segment */
+ uint32_t f_r_segments_percentage; /* reserved segments percentage */
+
+ struct uuid f_uuid; /* 128-bit uuid for volume */
+ char f_volume_name[16]; /* volume name */
+ uint32_t f_pad[104];
+} __packed;
+
+#ifdef _KERNEL
+CTASSERT(sizeof(struct nandfs_fsdata) == 512);
+#endif
+
+struct nandfs_super_block {
+ uint16_t s_magic; /* magic value for identification */
+
+ uint32_t s_sum; /* check sum of super block */
+
+ uint64_t s_last_cno; /* last checkpoint number */
+ uint64_t s_last_pseg; /* addr part. segm. written last */
+ uint64_t s_last_seq; /* seq.number of seg written last */
+ uint64_t s_free_blocks_count; /* free blocks count */
+
+ uint64_t s_mtime; /* mount time */
+ uint64_t s_wtime; /* write time */
+ uint16_t s_state; /* file system state */
+
+ char s_last_mounted[64]; /* directory where last mounted */
+
+ uint32_t s_c_interval; /* commit interval of segment */
+ uint32_t s_c_block_max; /* threshold of data amount for
+ the segment construction */
+ uint32_t s_reserved[32]; /* padding to end of the block */
+} __packed;
+
+#ifdef _KERNEL
+CTASSERT(sizeof(struct nandfs_super_block) == 256);
+#endif
+
+#define NANDFS_FSDATA_MAGIC 0xf8da
+#define NANDFS_SUPER_MAGIC 0x8008
+
+#define NANDFS_NFSAREAS 4
+#define NANDFS_DATA_OFFSET_BYTES(esize) (NANDFS_NFSAREAS * (esize))
+
+#define NANDFS_SBLOCK_OFFSET_BYTES (sizeof(struct nandfs_fsdata))
+
+#define NANDFS_DEF_BLOCKSIZE 4096
+#define NANDFS_MIN_BLOCKSIZE 512
+
+#define NANDFS_DEF_ERASESIZE (2 << 16)
+
+#define NANDFS_MIN_SEGSIZE NANDFS_DEF_ERASESIZE
+
+#define NANDFS_CURRENT_REV 9 /* current major revision */
+
+#define NANDFS_FSDATA_CRC_BYTES offsetof(struct nandfs_fsdata, f_pad)
+/* Bytes count of super_block for CRC-calculation */
+#define NANDFS_SB_BYTES offsetof(struct nandfs_super_block, s_reserved)
+
+/* Maximal count of links to a file */
+#define NANDFS_LINK_MAX 32000
+
+/*
+ * Structure of a directory entry.
+ *
+ * Note that they can't span blocks; the rec_len fills out.
+ */
+
+#define NANDFS_NAME_LEN 255
+struct nandfs_dir_entry {
+ uint64_t inode; /* inode number */
+ uint16_t rec_len; /* directory entry length */
+ uint8_t name_len; /* name length */
+ uint8_t file_type;
+ char name[NANDFS_NAME_LEN]; /* file name */
+ char pad;
+};
+
+/*
+ * NANDFS_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 8
+ */
+#define NANDFS_DIR_PAD 8
+#define NANDFS_DIR_ROUND (NANDFS_DIR_PAD - 1)
+#define NANDFS_DIR_NAME_OFFSET (offsetof(struct nandfs_dir_entry, name))
+#define NANDFS_DIR_REC_LEN(name_len) \
+ (((name_len) + NANDFS_DIR_NAME_OFFSET + NANDFS_DIR_ROUND) \
+ & ~NANDFS_DIR_ROUND)
+#define NANDFS_DIR_NAME_LEN(name_len) \
+ (NANDFS_DIR_REC_LEN(name_len) - NANDFS_DIR_NAME_OFFSET)
+
+/*
+ * NiLFS/NANDFS devides the disc into fixed length segments. Each segment is
+ * filled with one or more partial segments of variable lengths.
+ *
+ * Each partial segment has a segment summary header followed by updates of
+ * files and optionally a super root.
+ */
+
+/*
+ * Virtual to physical block translation information. For data blocks it maps
+ * logical block number bi_blkoff to virtual block nr bi_vblocknr. For non
+ * datablocks it is the virtual block number assigned to an indirect block
+ * and has no bi_blkoff. The physical block number is the next
+ * available data block in the partial segment after all the binfo's.
+ */
+struct nandfs_binfo_v {
+ uint64_t bi_ino; /* file's inode */
+ uint64_t bi_vblocknr; /* assigned virtual block number */
+ uint64_t bi_blkoff; /* for file's logical block number */
+};
+
+/*
+ * DAT allocation. For data blocks just the logical block number that maps on
+ * the next available data block in the partial segment after the binfo's.
+ */
+struct nandfs_binfo_dat {
+ uint64_t bi_ino;
+ uint64_t bi_blkoff; /* DAT file's logical block number */
+ uint8_t bi_level; /* whether this is meta block */
+ uint8_t bi_pad[7];
+};
+
+#ifdef _KERNEL
+CTASSERT(sizeof(struct nandfs_binfo_v) == sizeof(struct nandfs_binfo_dat));
+#endif
+
+/* Convenience union for both types of binfo's */
+union nandfs_binfo {
+ struct nandfs_binfo_v bi_v;
+ struct nandfs_binfo_dat bi_dat;
+};
+
+/* Indirect buffers path */
+struct nandfs_indir {
+ nandfs_daddr_t in_lbn;
+ int in_off;
+};
+
+/* The (partial) segment summary */
+struct nandfs_segment_summary {
+ uint32_t ss_datasum; /* CRC of complete data block */
+ uint32_t ss_sumsum; /* CRC of segment summary only */
+ uint32_t ss_magic; /* magic to identify segment summary */
+ uint16_t ss_bytes; /* size of segment summary structure */
+ uint16_t ss_flags; /* NANDFS_SS_* flags */
+ uint64_t ss_seq; /* sequence number of this segm. sum */
+ uint64_t ss_create; /* creation timestamp in seconds */
+ uint64_t ss_next; /* blocknumber of next segment */
+ uint32_t ss_nblocks; /* number of blocks used by summary */
+ uint32_t ss_nbinfos; /* number of binfo structures */
+ uint32_t ss_sumbytes; /* total size of segment summary */
+ uint32_t ss_pad;
+ /* stream of binfo structures */
+};
+
+#define NANDFS_SEGSUM_MAGIC 0x8e680011 /* segment summary magic number */
+
+/* Segment summary flags */
+#define NANDFS_SS_LOGBGN 0x0001 /* begins a logical segment */
+#define NANDFS_SS_LOGEND 0x0002 /* ends a logical segment */
+#define NANDFS_SS_SR 0x0004 /* has super root */
+#define NANDFS_SS_SYNDT 0x0008 /* includes data only updates */
+#define NANDFS_SS_GC 0x0010 /* segment written for cleaner operation */
+#define NANDFS_SS_FLAG_BITS "\20\1LOGBGN\2LOGEND\3SR\4SYNDT\5GC"
+
+/* Segment summary constrains */
+#define NANDFS_SEG_MIN_BLOCKS 16 /* minimum number of blocks in a
+ full segment */
+#define NANDFS_PSEG_MIN_BLOCKS 2 /* minimum number of blocks in a
+ partial segment */
+#define NANDFS_MIN_NRSVSEGS 8 /* minimum number of reserved
+ segments */
+
+/*
+ * Structure of DAT/inode file.
+ *
+ * A DAT file is devided into groups. The maximum number of groups is the
+ * number of block group descriptors that fit into one block; this descriptor
+ * only gives the number of free entries in the associated group.
+ *
+ * Each group has a block sized bitmap indicating if an entry is taken or
+ * empty. Each bit stands for a DAT entry.
+ *
+ * The inode file has exactly the same format only the entries are inode
+ * entries.
+ */
+
+struct nandfs_block_group_desc {
+ uint32_t bg_nfrees; /* num. free entries in block group */
+};
+
+/* DAT entry in a super root's DAT file */
+struct nandfs_dat_entry {
+ uint64_t de_blocknr; /* block number */
+ uint64_t de_start; /* valid from checkpoint */
+ uint64_t de_end; /* valid till checkpoint */
+ uint64_t de_rsv; /* reserved for future use */
+};
+
+/*
+ * Structure of CP file.
+ *
+ * A snapshot is just a checkpoint only it's protected against removal by the
+ * cleaner. The snapshots are kept on a double linked list of checkpoints.
+ */
+struct nandfs_snapshot_list {
+ uint64_t ssl_next; /* checkpoint nr. forward */
+ uint64_t ssl_prev; /* checkpoint nr. back */
+};
+
+/* Checkpoint entry structure */
+struct nandfs_checkpoint {
+ uint32_t cp_flags; /* NANDFS_CHECKPOINT_* flags */
+ uint32_t cp_checkpoints_count; /* ZERO, not used anymore? */
+ struct nandfs_snapshot_list cp_snapshot_list; /* list of snapshots */
+ uint64_t cp_cno; /* checkpoint number */
+ uint64_t cp_create; /* creation timestamp */
+ uint64_t cp_nblk_inc; /* number of blocks incremented */
+ uint64_t cp_blocks_count; /* reserved (might be deleted) */
+ struct nandfs_inode cp_ifile_inode; /* inode file inode */
+};
+
+/* Checkpoint flags */
+#define NANDFS_CHECKPOINT_SNAPSHOT 1
+#define NANDFS_CHECKPOINT_INVALID 2
+#define NANDFS_CHECKPOINT_SKETCH 4
+#define NANDFS_CHECKPOINT_MINOR 8
+#define NANDFS_CHECKPOINT_BITS "\20\1SNAPSHOT\2INVALID\3SKETCH\4MINOR"
+
+/* Header of the checkpoint file */
+struct nandfs_cpfile_header {
+ uint64_t ch_ncheckpoints; /* number of checkpoints */
+ uint64_t ch_nsnapshots; /* number of snapshots */
+ struct nandfs_snapshot_list ch_snapshot_list; /* snapshot list */
+};
+
+#define NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET \
+ ((sizeof(struct nandfs_cpfile_header) + \
+ sizeof(struct nandfs_checkpoint) - 1) / \
+ sizeof(struct nandfs_checkpoint))
+
+
+#define NANDFS_NOSEGMENT 0xffffffff
+
+/*
+ * Structure of SU file.
+ *
+ * The segment usage file sums up how each of the segments are used. They are
+ * indexed by their segment number.
+ */
+
+/* Segment usage entry */
+struct nandfs_segment_usage {
+ uint64_t su_lastmod; /* last modified timestamp */
+ uint32_t su_nblocks; /* number of blocks in segment */
+ uint32_t su_flags; /* NANDFS_SEGMENT_USAGE_* flags */
+};
+
+/* Segment usage flag */
+#define NANDFS_SEGMENT_USAGE_ACTIVE 1
+#define NANDFS_SEGMENT_USAGE_DIRTY 2
+#define NANDFS_SEGMENT_USAGE_ERROR 4
+#define NANDFS_SEGMENT_USAGE_GC 8
+#define NANDFS_SEGMENT_USAGE_BITS "\20\1ACTIVE\2DIRTY\3ERROR"
+
+/* Header of the segment usage file */
+struct nandfs_sufile_header {
+ uint64_t sh_ncleansegs; /* number of segments marked clean */
+ uint64_t sh_ndirtysegs; /* number of segments marked dirty */
+ uint64_t sh_last_alloc; /* last allocated segment number */
+};
+
+#define NANDFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET \
+ ((sizeof(struct nandfs_sufile_header) + \
+ sizeof(struct nandfs_segment_usage) - 1) / \
+ sizeof(struct nandfs_segment_usage))
+
+struct nandfs_seg_stat {
+ uint64_t nss_nsegs;
+ uint64_t nss_ncleansegs;
+ uint64_t nss_ndirtysegs;
+ uint64_t nss_ctime;
+ uint64_t nss_nongc_ctime;
+ uint64_t nss_prot_seq;
+};
+
+enum {
+ NANDFS_CHECKPOINT,
+ NANDFS_SNAPSHOT
+};
+
+#define NANDFS_CPINFO_MAX 512
+
+struct nandfs_cpinfo {
+ uint32_t nci_flags;
+ uint32_t nci_pad;
+ uint64_t nci_cno;
+ uint64_t nci_create;
+ uint64_t nci_nblk_inc;
+ uint64_t nci_blocks_count;
+ uint64_t nci_next;
+};
+
+#define NANDFS_SEGMENTS_MAX 512
+
+struct nandfs_suinfo {
+ uint64_t nsi_num;
+ uint64_t nsi_lastmod;
+ uint32_t nsi_blocks;
+ uint32_t nsi_flags;
+};
+
+#define NANDFS_VINFO_MAX 512
+
+struct nandfs_vinfo {
+ uint64_t nvi_ino;
+ uint64_t nvi_vblocknr;
+ uint64_t nvi_start;
+ uint64_t nvi_end;
+ uint64_t nvi_blocknr;
+ int nvi_alive;
+};
+
+struct nandfs_cpmode {
+ uint64_t ncpm_cno;
+ uint32_t ncpm_mode;
+ uint32_t ncpm_pad;
+};
+
+struct nandfs_argv {
+ uint64_t nv_base;
+ uint32_t nv_nmembs;
+ uint16_t nv_size;
+ uint16_t nv_flags;
+ uint64_t nv_index;
+};
+
+struct nandfs_cpstat {
+ uint64_t ncp_cno;
+ uint64_t ncp_ncps;
+ uint64_t ncp_nss;
+};
+
+struct nandfs_period {
+ uint64_t p_start;
+ uint64_t p_end;
+};
+
+struct nandfs_vdesc {
+ uint64_t vd_ino;
+ uint64_t vd_cno;
+ uint64_t vd_vblocknr;
+ struct nandfs_period vd_period;
+ uint64_t vd_blocknr;
+ uint64_t vd_offset;
+ uint32_t vd_flags;
+ uint32_t vd_pad;
+};
+
+struct nandfs_bdesc {
+ uint64_t bd_ino;
+ uint64_t bd_oblocknr;
+ uint64_t bd_blocknr;
+ uint64_t bd_offset;
+ uint32_t bd_level;
+ uint32_t bd_alive;
+};
+
+#ifndef _KERNEL
+#ifndef MNAMELEN
+#define MNAMELEN 88
+#endif
+#endif
+
+struct nandfs_fsinfo {
+ struct nandfs_fsdata fs_fsdata;
+ struct nandfs_super_block fs_super;
+ char fs_dev[MNAMELEN];
+};
+
+#define NANDFS_MAX_MOUNTS 65535
+
+#define NANDFS_IOCTL_GET_SUSTAT _IOR('N', 100, struct nandfs_seg_stat)
+#define NANDFS_IOCTL_CHANGE_CPMODE _IOWR('N', 101, struct nandfs_cpmode)
+#define NANDFS_IOCTL_GET_CPINFO _IOWR('N', 102, struct nandfs_argv)
+#define NANDFS_IOCTL_DELETE_CP _IOWR('N', 103, uint64_t[2])
+#define NANDFS_IOCTL_GET_CPSTAT _IOR('N', 104, struct nandfs_cpstat)
+#define NANDFS_IOCTL_GET_SUINFO _IOWR('N', 105, struct nandfs_argv)
+#define NANDFS_IOCTL_GET_VINFO _IOWR('N', 106, struct nandfs_argv)
+#define NANDFS_IOCTL_GET_BDESCS _IOWR('N', 107, struct nandfs_argv)
+#define NANDFS_IOCTL_GET_FSINFO _IOR('N', 108, struct nandfs_fsinfo)
+#define NANDFS_IOCTL_MAKE_SNAP _IOWR('N', 109, uint64_t)
+#define NANDFS_IOCTL_DELETE_SNAP _IOWR('N', 110, uint64_t)
+#define NANDFS_IOCTL_SYNC _IOWR('N', 111, uint64_t)
+
+#endif /* _NANDFS_FS_H */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_ifile.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_ifile.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,213 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_ifile.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+int
+nandfs_node_create(struct nandfsmount *nmp, struct nandfs_node **node,
+ uint16_t mode)
+{
+ struct nandfs_alloc_request req;
+ struct nandfs_device *nandfsdev;
+ struct nandfs_mdt *mdt;
+ struct nandfs_node *ifile;
+ struct nandfs_inode *inode;
+ struct vnode *vp;
+ uint32_t entry;
+ int error = 0;
+
+ nandfsdev = nmp->nm_nandfsdev;
+ mdt = &nandfsdev->nd_ifile_mdt;
+ ifile = nmp->nm_ifile_node;
+ vp = NTOV(ifile);
+
+ VOP_LOCK(vp, LK_EXCLUSIVE);
+ /* Allocate new inode in ifile */
+ req.entrynum = nandfsdev->nd_last_ino + 1;
+ error = nandfs_find_free_entry(mdt, ifile, &req);
+ if (error) {
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ error = nandfs_get_entry_block(mdt, ifile, &req, &entry, 1);
+ if (error) {
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ /* Inode initialization */
+ inode = ((struct nandfs_inode *) req.bp_entry->b_data) + entry;
+ nandfs_inode_init(inode, mode);
+
+ error = nandfs_alloc_entry(mdt, &req);
+ if (error) {
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ VOP_UNLOCK(vp, 0);
+
+ nandfsdev->nd_last_ino = req.entrynum;
+ error = nandfs_get_node(nmp, req.entrynum, node);
+ DPRINTF(IFILE, ("%s: node: %p ino: %#jx\n",
+ __func__, node, (uintmax_t)((*node)->nn_ino)));
+
+ return (error);
+}
+
+int
+nandfs_node_destroy(struct nandfs_node *node)
+{
+ struct nandfs_alloc_request req;
+ struct nandfsmount *nmp;
+ struct nandfs_mdt *mdt;
+ struct nandfs_node *ifile;
+ struct vnode *vp;
+ int error = 0;
+
+ nmp = node->nn_nmp;
+ req.entrynum = node->nn_ino;
+ mdt = &nmp->nm_nandfsdev->nd_ifile_mdt;
+ ifile = nmp->nm_ifile_node;
+ vp = NTOV(ifile);
+
+ DPRINTF(IFILE, ("%s: destroy node: %p ino: %#jx\n",
+ __func__, node, (uintmax_t)node->nn_ino));
+ VOP_LOCK(vp, LK_EXCLUSIVE);
+
+ error = nandfs_find_entry(mdt, ifile, &req);
+ if (error) {
+ nandfs_error("%s: finding entry error:%d node %p(%jx)",
+ __func__, error, node, node->nn_ino);
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ nandfs_inode_destroy(&node->nn_inode);
+
+ error = nandfs_free_entry(mdt, &req);
+ if (error) {
+ nandfs_error("%s: freing entry error:%d node %p(%jx)",
+ __func__, error, node, node->nn_ino);
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ VOP_UNLOCK(vp, 0);
+ DPRINTF(IFILE, ("%s: freed node %p ino %#jx\n",
+ __func__, node, (uintmax_t)node->nn_ino));
+ return (error);
+}
+
+int
+nandfs_node_update(struct nandfs_node *node)
+{
+ struct nandfs_alloc_request req;
+ struct nandfsmount *nmp;
+ struct nandfs_mdt *mdt;
+ struct nandfs_node *ifile;
+ struct nandfs_inode *inode;
+ uint32_t index;
+ int error = 0;
+
+ nmp = node->nn_nmp;
+ ifile = nmp->nm_ifile_node;
+ ASSERT_VOP_LOCKED(NTOV(ifile), __func__);
+
+ req.entrynum = node->nn_ino;
+ mdt = &nmp->nm_nandfsdev->nd_ifile_mdt;
+
+ DPRINTF(IFILE, ("%s: node:%p ino:%#jx\n",
+ __func__, &node->nn_inode, (uintmax_t)node->nn_ino));
+
+ error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0);
+ if (error) {
+ printf("nandfs_get_entry_block returned with ERROR=%d\n",
+ error);
+ return (error);
+ }
+
+ inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index;
+ memcpy(inode, &node->nn_inode, sizeof(*inode));
+ error = nandfs_dirty_buf(req.bp_entry, 0);
+
+ return (error);
+}
+
+int
+nandfs_get_node_entry(struct nandfsmount *nmp, struct nandfs_inode **inode,
+ uint64_t ino, struct buf **bp)
+{
+ struct nandfs_alloc_request req;
+ struct nandfs_mdt *mdt;
+ struct nandfs_node *ifile;
+ struct vnode *vp;
+ uint32_t index;
+ int error = 0;
+
+ req.entrynum = ino;
+ mdt = &nmp->nm_nandfsdev->nd_ifile_mdt;
+ ifile = nmp->nm_ifile_node;
+ vp = NTOV(ifile);
+
+ VOP_LOCK(vp, LK_EXCLUSIVE);
+ error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0);
+ if (error) {
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ *inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index;
+ *bp = req.bp_entry;
+ VOP_UNLOCK(vp, 0);
+ return (0);
+}
+
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_mount.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_mount.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the
+ * NetBSD Project. See http://www.NetBSD.org/ for
+ * information about NetBSD.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_mount.h,v 1.1 2009/07/18 16:31:42 reinoud
+ *
+ * $FreeBSD: head/sys/fs/nandfs/nandfs_mount.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _FS_NANDFS_NANDFS_MOUNT_H_
+#define _FS_NANDFS_NANDFS_MOUNT_H_
+
+/*
+ * Arguments to mount NANDFS filingsystem.
+ */
+
+struct nandfs_args {
+ char *fspec; /* mount specifier */
+ int64_t cpno; /* checkpoint number */
+};
+
+#endif /* !_FS_NANDFS_NANDFS_MOUNT_H_ */
+
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_segment.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_segment.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,1329 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_segment.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/libkern.h>
+
+#include <ddb/ddb.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+static int
+nandfs_new_segment(struct nandfs_device *fsdev)
+{
+ int error = 0;
+ uint64_t new;
+
+ error = nandfs_alloc_segment(fsdev, &new);
+ if (!error) {
+ fsdev->nd_seg_num = fsdev->nd_next_seg_num;
+ fsdev->nd_next_seg_num = new;
+ }
+ DPRINTF(SYNC, ("%s: new segment %jx next %jx error %d\n",
+ __func__, (uintmax_t)fsdev->nd_seg_num, (uintmax_t)new, error));
+ if (error)
+ nandfs_error("%s: cannot create segment error %d\n",
+ __func__, error);
+
+ return (error);
+}
+
+static int
+create_segment(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_segment *seg;
+ struct nandfs_device *fsdev;
+ struct nandfs_segment *prev;
+ struct buf *bp;
+ uint64_t start_block, curr;
+ uint32_t blks_per_seg, nblocks;
+ int error;
+
+ fsdev = seginfo->fsdev;
+ prev = seginfo->curseg;
+ blks_per_seg = fsdev->nd_fsdata.f_blocks_per_segment;
+ nblocks = fsdev->nd_last_segsum.ss_nblocks;
+
+ if (!prev) {
+ vfs_timestamp(&fsdev->nd_ts);
+ /* Touch current segment */
+ error = nandfs_touch_segment(fsdev, fsdev->nd_seg_num);
+ if (error) {
+ nandfs_error("%s: cannot preallocate segment %jx\n",
+ __func__, fsdev->nd_seg_num);
+ return (error);
+ }
+ error = nandfs_touch_segment(fsdev, 0);
+ if (error) {
+ nandfs_error("%s: cannot dirty block with segment 0\n",
+ __func__);
+ return (error);
+ }
+ start_block = fsdev->nd_last_pseg + (uint64_t)nblocks;
+ /*
+ * XXX Hack
+ */
+ if (blks_per_seg - (start_block % blks_per_seg) - 1 == 0)
+ start_block++;
+ curr = nandfs_get_segnum_of_block(fsdev, start_block);
+ /* Allocate new segment if last one is full */
+ if (fsdev->nd_seg_num != curr) {
+ error = nandfs_new_segment(fsdev);
+ if (error) {
+ nandfs_error("%s: cannot create new segment\n",
+ __func__);
+ return (error);
+ }
+ /*
+ * XXX Hack
+ */
+ nandfs_get_segment_range(fsdev, fsdev->nd_seg_num, &start_block, NULL);
+ }
+ } else {
+ nandfs_get_segment_range(fsdev, fsdev->nd_next_seg_num,
+ &start_block, NULL);
+
+ /* Touch current segment and allocate and touch new one */
+ error = nandfs_new_segment(fsdev);
+ if (error) {
+ nandfs_error("%s: cannot create next segment\n",
+ __func__);
+ return (error);
+ }
+
+ /* Reiterate in case new buf is dirty */
+ seginfo->reiterate = 1;
+ }
+
+ /* Allocate and initialize nandfs_segment structure */
+ seg = malloc(sizeof(*seg), M_DEVBUF, M_WAITOK|M_ZERO);
+ TAILQ_INIT(&seg->segsum);
+ TAILQ_INIT(&seg->data);
+ seg->fsdev = fsdev;
+ seg->start_block = start_block;
+ seg->num_blocks = blks_per_seg - (start_block % blks_per_seg) - 1;
+ seg->seg_num = fsdev->nd_seg_num;
+ seg->seg_next = fsdev->nd_next_seg_num;
+ seg->segsum_blocks = 1;
+ seg->bytes_left = fsdev->nd_blocksize -
+ sizeof(struct nandfs_segment_summary);
+ seg->segsum_bytes = sizeof(struct nandfs_segment_summary);
+
+ /* Allocate buffer for segment summary */
+ bp = getblk(fsdev->nd_devvp, nandfs_block_to_dblock(fsdev,
+ seg->start_block), fsdev->nd_blocksize, 0, 0, 0);
+ bzero(bp->b_data, seginfo->fsdev->nd_blocksize);
+ bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj;
+ bp->b_flags |= B_MANAGED;
+
+ /* Add buffer to segment */
+ TAILQ_INSERT_TAIL(&seg->segsum, bp, b_cluster.cluster_entry);
+ seg->current_off = bp->b_data + sizeof(struct nandfs_segment_summary);
+
+ DPRINTF(SYNC, ("%s: seg %p : initial settings: start %#jx size :%#x\n",
+ __func__, seg, (uintmax_t)seg->start_block, seg->num_blocks));
+ DPRINTF(SYNC, ("%s: seg->seg_num %#jx cno %#jx next %#jx\n", __func__,
+ (uintmax_t)seg->seg_num, (uintmax_t)(fsdev->nd_last_cno + 1),
+ (uintmax_t)seg->seg_next));
+
+ if (!prev)
+ LIST_INSERT_HEAD(&seginfo->seg_list, seg, seg_link);
+ else
+ LIST_INSERT_AFTER(prev, seg, seg_link);
+
+ seginfo->curseg = seg;
+
+ return (0);
+}
+
+static int
+delete_segment(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_segment *seg, *tseg;
+ struct buf *bp, *tbp;
+
+ LIST_FOREACH_SAFE(seg, &seginfo->seg_list, seg_link, tseg) {
+ TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry,
+ tbp) {
+ TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry);
+ bp->b_flags &= ~B_MANAGED;
+ brelse(bp);
+ };
+
+ LIST_REMOVE(seg, seg_link);
+ free(seg, M_DEVBUF);
+ }
+
+ return (0);
+}
+
+static int
+create_seginfo(struct nandfs_device *fsdev, struct nandfs_seginfo **seginfo)
+{
+ struct nandfs_seginfo *info;
+
+ info = malloc(sizeof(*info), M_DEVBUF, M_WAITOK);
+
+ LIST_INIT(&info->seg_list);
+ info->fsdev = fsdev;
+ info->curseg = NULL;
+ info->blocks = 0;
+ *seginfo = info;
+ fsdev->nd_seginfo = info;
+ return (0);
+}
+
+static int
+delete_seginfo(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_device *nffsdev;
+
+ nffsdev = seginfo->fsdev;
+ delete_segment(seginfo);
+ nffsdev->nd_seginfo = NULL;
+ free(seginfo, M_DEVBUF);
+
+ return (0);
+}
+
+static int
+nandfs_create_superroot_block(struct nandfs_seginfo *seginfo,
+ struct buf **newbp)
+{
+ struct buf *bp;
+ int error;
+
+ bp = nandfs_geteblk(seginfo->fsdev->nd_blocksize, GB_NOWAIT_BD);
+
+ bzero(bp->b_data, seginfo->fsdev->nd_blocksize);
+ bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj;
+ bp->b_flags |= B_MANAGED;
+
+ if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) {
+ error = create_segment(seginfo);
+ if (error) {
+ brelse(bp);
+ nandfs_error("%s: no segment for superroot\n",
+ __func__);
+ return (error);
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&seginfo->curseg->data, bp, b_cluster.cluster_entry);
+
+ seginfo->curseg->nblocks++;
+ seginfo->curseg->num_blocks--;
+ seginfo->blocks++;
+
+ *newbp = bp;
+ return (0);
+}
+
+static int
+nandfs_add_superroot(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_super_root *sr;
+ struct buf *bp = NULL;
+ uint64_t crc_skip;
+ uint32_t crc_calc;
+ int error;
+
+ fsdev = seginfo->fsdev;
+
+ error = nandfs_create_superroot_block(seginfo, &bp);
+ if (error) {
+ nandfs_error("%s: cannot add superroot\n", __func__);
+ return (error);
+ }
+
+ sr = (struct nandfs_super_root *)bp->b_data;
+ /* Save superroot CRC */
+ sr->sr_bytes = NANDFS_SR_BYTES;
+ sr->sr_flags = 0;
+ sr->sr_nongc_ctime = 0;
+
+ memcpy(&sr->sr_dat, &fsdev->nd_dat_node->nn_inode,
+ sizeof(struct nandfs_inode));
+ memcpy(&sr->sr_cpfile, &fsdev->nd_cp_node->nn_inode,
+ sizeof(struct nandfs_inode));
+ memcpy(&sr->sr_sufile, &fsdev->nd_su_node->nn_inode,
+ sizeof(struct nandfs_inode));
+
+ crc_skip = sizeof(sr->sr_sum);
+ crc_calc = crc32((uint8_t *)sr + crc_skip, NANDFS_SR_BYTES - crc_skip);
+
+ sr->sr_sum = crc_calc;
+
+ bp->b_flags |= B_MANAGED;
+ bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj;
+
+ bp->b_flags &= ~B_INVAL;
+ nandfs_dirty_bufs_increment(fsdev);
+ DPRINTF(SYNC, ("%s: bp:%p\n", __func__, bp));
+
+ return (0);
+}
+
+static int
+nandfs_add_segsum_block(struct nandfs_seginfo *seginfo, struct buf **newbp)
+{
+ struct nandfs_device *fsdev;
+ nandfs_daddr_t blk;
+ struct buf *bp;
+ int error;
+
+ if (!(seginfo->curseg) || seginfo->curseg->num_blocks <= 1) {
+ error = create_segment(seginfo);
+ if (error) {
+ nandfs_error("%s: error:%d when creating segment\n",
+ __func__, error);
+ return (error);
+ }
+ *newbp = TAILQ_FIRST(&seginfo->curseg->segsum);
+ return (0);
+ }
+
+ fsdev = seginfo->fsdev;
+ blk = nandfs_block_to_dblock(fsdev, seginfo->curseg->start_block +
+ seginfo->curseg->segsum_blocks);
+
+ bp = getblk(fsdev->nd_devvp, blk, fsdev->nd_blocksize, 0, 0, 0);
+
+ bzero(bp->b_data, seginfo->fsdev->nd_blocksize);
+ bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj;
+ bp->b_flags |= B_MANAGED;
+
+ TAILQ_INSERT_TAIL(&seginfo->curseg->segsum, bp,
+ b_cluster.cluster_entry);
+ seginfo->curseg->num_blocks--;
+
+ seginfo->curseg->segsum_blocks++;
+ seginfo->curseg->bytes_left = seginfo->fsdev->nd_blocksize;
+ seginfo->curseg->current_off = bp->b_data;
+ seginfo->blocks++;
+
+ *newbp = bp;
+
+ DPRINTF(SYNC, ("%s: bp %p\n", __func__, bp));
+
+ return (0);
+}
+
+static int
+nandfs_add_blocks(struct nandfs_seginfo *seginfo, struct nandfs_node *node,
+ struct buf *bp)
+{
+ union nandfs_binfo *binfo;
+ struct buf *seg_bp;
+ int error;
+
+ if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) {
+ error = create_segment(seginfo);
+ if (error) {
+ nandfs_error("%s: error:%d when creating segment\n",
+ __func__, error);
+ return (error);
+ }
+ }
+
+ if (seginfo->curseg->bytes_left < sizeof(union nandfs_binfo)) {
+ error = nandfs_add_segsum_block(seginfo, &seg_bp);
+ if (error) {
+ nandfs_error("%s: error:%d when adding segsum\n",
+ __func__, error);
+ return (error);
+ }
+ }
+ binfo = (union nandfs_binfo *)seginfo->curseg->current_off;
+
+ if (node->nn_ino != NANDFS_DAT_INO) {
+ binfo->bi_v.bi_blkoff = bp->b_lblkno;
+ binfo->bi_v.bi_ino = node->nn_ino;
+ } else {
+ binfo->bi_dat.bi_blkoff = bp->b_lblkno;
+ binfo->bi_dat.bi_ino = node->nn_ino;
+ if (NANDFS_IS_INDIRECT(bp))
+ binfo->bi_dat.bi_level = 1;
+ else
+ binfo->bi_dat.bi_level = 0;
+ }
+ binfo++;
+
+ seginfo->curseg->bytes_left -= sizeof(union nandfs_binfo);
+ seginfo->curseg->segsum_bytes += sizeof(union nandfs_binfo);
+ seginfo->curseg->current_off = (char *)binfo;
+
+ TAILQ_INSERT_TAIL(&seginfo->curseg->data, bp, b_cluster.cluster_entry);
+
+ seginfo->curseg->nbinfos++;
+ seginfo->curseg->nblocks++;
+ seginfo->curseg->num_blocks--;
+ seginfo->blocks++;
+
+ DPRINTF(SYNC, ("%s: bp (%p) number %x (left %x)\n",
+ __func__, bp, seginfo->curseg->nblocks,
+ seginfo->curseg->num_blocks));
+ return (0);
+}
+
+static int
+nandfs_iterate_dirty_buf(struct vnode *vp, struct nandfs_seginfo *seginfo,
+ uint8_t hold)
+{
+ struct buf *bp, *tbd;
+ struct bufobj *bo;
+ struct nandfs_node *node;
+ int error;
+
+ node = VTON(vp);
+ bo = &vp->v_bufobj;
+
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
+ /* Iterate dirty data bufs */
+ TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, tbd) {
+ DPRINTF(SYNC, ("%s: vp (%p): bp (%p) with lblkno %jx ino %jx "
+ "add buf\n", __func__, vp, bp, bp->b_lblkno, node->nn_ino));
+
+ if (!(NANDFS_ISGATHERED(bp))) {
+ error = nandfs_bmap_update_dat(node,
+ nandfs_vblk_get(bp), bp);
+ if (error)
+ return (error);
+ NANDFS_GATHER(bp);
+ nandfs_add_blocks(seginfo, node, bp);
+ }
+ }
+
+ return (0);
+}
+
+static int
+nandfs_iterate_system_vnode(struct nandfs_node *node,
+ struct nandfs_seginfo *seginfo)
+{
+ struct vnode *vp;
+ int nblocks;
+ uint8_t hold = 0;
+
+ if (node->nn_ino != NANDFS_IFILE_INO)
+ hold = 1;
+
+ vp = NTOV(node);
+
+ nblocks = vp->v_bufobj.bo_dirty.bv_cnt;
+ DPRINTF(SYNC, ("%s: vp (%p): nblocks %x ino %jx\n",
+ __func__, vp, nblocks, node->nn_ino));
+
+ if (nblocks)
+ nandfs_iterate_dirty_buf(vp, seginfo, hold);
+
+ return (0);
+}
+
+static int
+nandfs_iterate_dirty_vnodes(struct mount *mp, struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_node *nandfs_node;
+ struct vnode *vp, *mvp;
+ struct thread *td;
+ int error, lockreq, update;
+
+ td = curthread;
+ lockreq = LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY;
+
+ MNT_ILOCK(mp);
+
+ MNT_VNODE_FOREACH(vp, mp, mvp) {
+ update = 0;
+
+ if (mp->mnt_syncer == vp)
+ continue;
+ if (VOP_ISLOCKED(vp))
+ continue;
+
+ VI_LOCK(vp);
+ MNT_IUNLOCK(mp);
+ if (vp->v_iflag & VI_DOOMED) {
+ VI_UNLOCK(vp);
+ MNT_ILOCK(mp);
+ continue;
+ }
+
+ if ((error = vget(vp, lockreq, td)) != 0) {
+ MNT_ILOCK(mp);
+ continue;
+ }
+
+ if (vp->v_iflag & VI_DOOMED) {
+ vput(vp);
+ MNT_ILOCK(mp);
+ continue;
+ }
+
+ nandfs_node = VTON(vp);
+ if (nandfs_node->nn_flags & IN_MODIFIED) {
+ nandfs_node->nn_flags &= ~(IN_MODIFIED);
+ update = 1;
+ }
+
+ if (vp->v_bufobj.bo_dirty.bv_cnt) {
+ error = nandfs_iterate_dirty_buf(vp, seginfo, 0);
+ if (error) {
+ nandfs_error("%s: cannot iterate vnode:%p "
+ "err:%d\n", __func__, vp, error);
+ vput(vp);
+ return (error);
+ }
+ update = 1;
+ } else
+ vput(vp);
+
+ if (update)
+ nandfs_node_update(nandfs_node);
+
+ MNT_ILOCK(mp);
+ }
+
+ MNT_IUNLOCK(mp);
+
+ return (0);
+}
+
+static int
+nandfs_update_phys_block(struct nandfs_device *fsdev, struct buf *bp,
+ uint64_t phys_blknr, union nandfs_binfo *binfo)
+{
+ struct nandfs_node *node, *dat;
+ struct vnode *vp;
+ uint64_t new_blknr;
+ int error;
+
+ vp = bp->b_vp;
+ node = VTON(vp);
+ new_blknr = nandfs_vblk_get(bp);
+ dat = fsdev->nd_dat_node;
+
+ DPRINTF(BMAP, ("%s: ino %#jx lblk %#jx: vblk %#jx -> %#jx\n",
+ __func__, (uintmax_t)node->nn_ino, (uintmax_t)bp->b_lblkno,
+ (uintmax_t)new_blknr, (uintmax_t)phys_blknr));
+
+ if (node->nn_ino != NANDFS_DAT_INO) {
+ KASSERT((new_blknr != 0), ("vblk for bp %p is 0", bp));
+
+ nandfs_vblock_assign(fsdev, new_blknr, phys_blknr);
+ binfo->bi_v.bi_vblocknr = new_blknr;
+ binfo->bi_v.bi_blkoff = bp->b_lblkno;
+ binfo->bi_v.bi_ino = node->nn_ino;
+ } else {
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ error = nandfs_bmap_update_block(node, bp, phys_blknr);
+ if (error) {
+ nandfs_error("%s: error updating block:%jx for bp:%p\n",
+ __func__, (uintmax_t)phys_blknr, bp);
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+ }
+ VOP_UNLOCK(NTOV(dat), 0);
+ binfo->bi_dat.bi_blkoff = bp->b_lblkno;
+ binfo->bi_dat.bi_ino = node->nn_ino;
+ if (NANDFS_IS_INDIRECT(bp))
+ binfo->bi_dat.bi_level = 1;
+ else
+ binfo->bi_dat.bi_level = 0;
+ }
+
+ return (0);
+}
+
+#define NBINFO(off) ((off) + sizeof(union nandfs_binfo))
+static int
+nandfs_segment_assign_pblk(struct nandfs_segment *nfsseg)
+{
+ struct nandfs_device *fsdev;
+ union nandfs_binfo *binfo;
+ struct buf *bp, *seg_bp;
+ uint64_t blocknr;
+ uint32_t curr_off, blocksize;
+ int error;
+
+ fsdev = nfsseg->fsdev;
+ blocksize = fsdev->nd_blocksize;
+
+ blocknr = nfsseg->start_block + nfsseg->segsum_blocks;
+ seg_bp = TAILQ_FIRST(&nfsseg->segsum);
+ DPRINTF(SYNC, ("%s: seg:%p segsum bp:%p data:%p\n",
+ __func__, nfsseg, seg_bp, seg_bp->b_data));
+
+ binfo = (union nandfs_binfo *)(seg_bp->b_data +
+ sizeof(struct nandfs_segment_summary));
+ curr_off = sizeof(struct nandfs_segment_summary);
+
+ TAILQ_FOREACH(bp, &nfsseg->data, b_cluster.cluster_entry) {
+ KASSERT((bp->b_vp), ("bp %p has not vp", bp));
+
+ DPRINTF(BMAP, ("\n\n%s: assign buf %p for ino %#jx next %p\n",
+ __func__, bp, (uintmax_t)VTON(bp->b_vp)->nn_ino,
+ TAILQ_NEXT(bp, b_cluster.cluster_entry)));
+
+ if (NBINFO(curr_off) > blocksize) {
+ seg_bp = TAILQ_NEXT(seg_bp, b_cluster.cluster_entry);
+ binfo = (union nandfs_binfo *)seg_bp->b_data;
+ curr_off = 0;
+ DPRINTF(SYNC, ("%s: next segsum %p data %p\n",
+ __func__, seg_bp, seg_bp->b_data));
+ }
+
+ error = nandfs_update_phys_block(fsdev, bp, blocknr, binfo);
+ if (error) {
+ nandfs_error("%s: err:%d when updatinng phys block:%jx"
+ " for bp:%p and binfo:%p\n", __func__, error,
+ (uintmax_t)blocknr, bp, binfo);
+ return (error);
+ }
+ binfo++;
+ curr_off = NBINFO(curr_off);
+
+ blocknr++;
+ }
+
+ return (0);
+}
+
+static int
+nandfs_seginfo_assign_pblk(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_segment *nfsseg;
+ int error = 0;
+
+ LIST_FOREACH(nfsseg, &seginfo->seg_list, seg_link) {
+ error = nandfs_segment_assign_pblk(nfsseg);
+ if (error)
+ break;
+ }
+
+ return (error);
+}
+
+static struct nandfs_segment_summary *
+nandfs_fill_segsum(struct nandfs_segment *seg, int has_sr)
+{
+ struct nandfs_segment_summary *ss;
+ struct nandfs_device *fsdev;
+ struct buf *bp;
+ uint32_t rest, segsum_size, blocksize, crc_calc;
+ uint16_t flags;
+ uint8_t *crc_area, crc_skip;
+
+ DPRINTF(SYNC, ("%s: seg %#jx nblocks %#x sumbytes %#x\n",
+ __func__, (uintmax_t) seg->seg_num,
+ seg->nblocks + seg->segsum_blocks,
+ seg->segsum_bytes));
+
+ fsdev = seg->fsdev;
+
+ flags = NANDFS_SS_LOGBGN | NANDFS_SS_LOGEND;
+ if (has_sr)
+ flags |= NANDFS_SS_SR;
+
+ bp = TAILQ_FIRST(&seg->segsum);
+ ss = (struct nandfs_segment_summary *) bp->b_data;
+ ss->ss_magic = NANDFS_SEGSUM_MAGIC;
+ ss->ss_bytes = sizeof(struct nandfs_segment_summary);
+ ss->ss_flags = flags;
+ ss->ss_seq = ++(fsdev->nd_seg_sequence);
+ ss->ss_create = fsdev->nd_ts.tv_sec;
+ nandfs_get_segment_range(fsdev, seg->seg_next, &ss->ss_next, NULL);
+ ss->ss_nblocks = seg->nblocks + seg->segsum_blocks;
+ ss->ss_nbinfos = seg->nbinfos;
+ ss->ss_sumbytes = seg->segsum_bytes;
+
+ crc_skip = sizeof(ss->ss_datasum) + sizeof(ss->ss_sumsum);
+ blocksize = seg->fsdev->nd_blocksize;
+
+ segsum_size = seg->segsum_bytes - crc_skip;
+ rest = min(seg->segsum_bytes, blocksize) - crc_skip;
+ crc_area = (uint8_t *)ss + crc_skip;
+ crc_calc = ~0U;
+ while (segsum_size > 0) {
+ crc_calc = crc32_raw(crc_area, rest, crc_calc);
+ segsum_size -= rest;
+ if (!segsum_size)
+ break;
+ bp = TAILQ_NEXT(bp, b_cluster.cluster_entry);
+ crc_area = (uint8_t *)bp->b_data;
+ rest = segsum_size <= blocksize ? segsum_size : blocksize;
+ }
+ ss->ss_sumsum = crc_calc ^ ~0U;
+
+ return (ss);
+
+}
+
+static int
+nandfs_save_buf(struct buf *bp, uint64_t blocknr, struct nandfs_device *fsdev)
+{
+ struct bufobj *bo;
+ int error;
+
+ bo = &fsdev->nd_devvp->v_bufobj;
+
+ bp->b_blkno = nandfs_block_to_dblock(fsdev, blocknr);
+ bp->b_iooffset = dbtob(bp->b_blkno);
+
+ KASSERT(bp->b_bufobj != NULL, ("no bufobj for %p", bp));
+ if (bp->b_bufobj != bo) {
+ BO_LOCK(bp->b_bufobj);
+ BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
+ BO_MTX(bp->b_bufobj));
+ KASSERT(BUF_ISLOCKED(bp), ("Problem with locking buffer"));
+ }
+
+ DPRINTF(SYNC, ("%s: buf: %p offset %#jx blk %#jx size %#x\n",
+ __func__, bp, (uintmax_t)bp->b_offset, (uintmax_t)blocknr,
+ fsdev->nd_blocksize));
+
+ NANDFS_UNGATHER(bp);
+ nandfs_buf_clear(bp, 0xffffffff);
+ bp->b_flags &= ~(B_ASYNC|B_INVAL|B_MANAGED);
+ error = bwrite(bp);
+ if (error) {
+ nandfs_error("%s: error:%d when writing buffer:%p\n",
+ __func__, error, bp);
+ return (error);
+ }
+ return (error);
+}
+
+static void
+nandfs_clean_buf(struct nandfs_device *fsdev, struct buf *bp)
+{
+
+ DPRINTF(SYNC, ("%s: buf: %p\n", __func__, bp));
+
+ NANDFS_UNGATHER(bp);
+ nandfs_buf_clear(bp, 0xffffffff);
+ bp->b_flags &= ~(B_ASYNC|B_INVAL|B_MANAGED);
+ nandfs_undirty_buf_fsdev(fsdev, bp);
+}
+
+static void
+nandfs_clean_segblocks(struct nandfs_segment *seg, uint8_t unlock)
+{
+ struct nandfs_device *fsdev = seg->fsdev;
+ struct nandfs_segment *next_seg;
+ struct buf *bp, *tbp, *next_bp;
+ struct vnode *vp, *next_vp;
+
+ VOP_LOCK(fsdev->nd_devvp, LK_EXCLUSIVE);
+ TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) {
+ TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry);
+ nandfs_clean_buf(fsdev, bp);
+ };
+
+ TAILQ_FOREACH_SAFE(bp, &seg->data, b_cluster.cluster_entry, tbp) {
+ TAILQ_REMOVE(&seg->data, bp, b_cluster.cluster_entry);
+
+ /*
+ * If bp is not super-root and vnode is not currently
+ * locked lock it.
+ */
+ vp = bp->b_vp;
+ next_vp = NULL;
+ next_bp = TAILQ_NEXT(bp, b_cluster.cluster_entry);
+ if (!next_bp) {
+ next_seg = LIST_NEXT(seg, seg_link);
+ if (next_seg)
+ next_bp = TAILQ_FIRST(&next_seg->data);
+ }
+
+ if (next_bp)
+ next_vp = next_bp->b_vp;
+
+ nandfs_clean_buf(fsdev, bp);
+
+ if (unlock && vp != NULL && next_vp != vp &&
+ !NANDFS_SYS_NODE(VTON(vp)->nn_ino))
+ vput(vp);
+
+ nandfs_dirty_bufs_decrement(fsdev);
+ }
+
+ VOP_UNLOCK(fsdev->nd_devvp, 0);
+}
+
+static int
+nandfs_save_segblocks(struct nandfs_segment *seg, uint8_t unlock)
+{
+ struct nandfs_device *fsdev = seg->fsdev;
+ struct nandfs_segment *next_seg;
+ struct buf *bp, *tbp, *next_bp;
+ struct vnode *vp, *next_vp;
+ uint64_t blocknr;
+ uint32_t i = 0;
+ int error = 0;
+
+ VOP_LOCK(fsdev->nd_devvp, LK_EXCLUSIVE);
+ TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) {
+ TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry);
+ blocknr = seg->start_block + i;
+ error = nandfs_save_buf(bp, blocknr, fsdev);
+ if (error) {
+ nandfs_error("%s: error saving buf: %p blocknr:%jx\n",
+ __func__, bp, (uintmax_t)blocknr);
+ goto out;
+ }
+ i++;
+ };
+
+ i = 0;
+ TAILQ_FOREACH_SAFE(bp, &seg->data, b_cluster.cluster_entry, tbp) {
+ TAILQ_REMOVE(&seg->data, bp, b_cluster.cluster_entry);
+
+ blocknr = seg->start_block + seg->segsum_blocks + i;
+ /*
+ * If bp is not super-root and vnode is not currently
+ * locked lock it.
+ */
+ vp = bp->b_vp;
+ next_vp = NULL;
+ next_bp = TAILQ_NEXT(bp, b_cluster.cluster_entry);
+ if (!next_bp) {
+ next_seg = LIST_NEXT(seg, seg_link);
+ if (next_seg)
+ next_bp = TAILQ_FIRST(&next_seg->data);
+ }
+
+ if (next_bp)
+ next_vp = next_bp->b_vp;
+
+ error = nandfs_save_buf(bp, blocknr, fsdev);
+ if (error) {
+ nandfs_error("%s: error saving buf: %p blknr: %jx\n",
+ __func__, bp, (uintmax_t)blocknr);
+ if (unlock && vp != NULL && next_vp != vp &&
+ !NANDFS_SYS_NODE(VTON(vp)->nn_ino))
+ vput(vp);
+ goto out;
+ }
+
+ if (unlock && vp != NULL && next_vp != vp &&
+ !NANDFS_SYS_NODE(VTON(vp)->nn_ino))
+ vput(vp);
+
+ i++;
+ nandfs_dirty_bufs_decrement(fsdev);
+ }
+out:
+ if (error) {
+ nandfs_clean_segblocks(seg, unlock);
+ VOP_UNLOCK(fsdev->nd_devvp, 0);
+ return (error);
+ }
+
+ VOP_UNLOCK(fsdev->nd_devvp, 0);
+ return (error);
+}
+
+
+static void
+clean_seginfo(struct nandfs_seginfo *seginfo, uint8_t unlock)
+{
+ struct nandfs_segment *seg;
+
+ DPRINTF(SYNC, ("%s: seginfo %p\n", __func__, seginfo));
+
+ LIST_FOREACH(seg, &seginfo->seg_list, seg_link) {
+ nandfs_clean_segblocks(seg, unlock);
+ }
+}
+
+static int
+save_seginfo(struct nandfs_seginfo *seginfo, uint8_t unlock)
+{
+ struct nandfs_segment *seg;
+ struct nandfs_device *fsdev;
+ struct nandfs_segment_summary *ss;
+ int error = 0;
+
+ fsdev = seginfo->fsdev;
+
+ DPRINTF(SYNC, ("%s: seginfo %p\n", __func__, seginfo));
+
+ LIST_FOREACH(seg, &seginfo->seg_list, seg_link) {
+ if (LIST_NEXT(seg, seg_link)) {
+ nandfs_fill_segsum(seg, 0);
+ error = nandfs_save_segblocks(seg, unlock);
+ if (error) {
+ nandfs_error("%s: error:%d saving seg:%p\n",
+ __func__, error, seg);
+ goto out;
+ }
+ } else {
+ ss = nandfs_fill_segsum(seg, 1);
+ fsdev->nd_last_segsum = *ss;
+ error = nandfs_save_segblocks(seg, unlock);
+ if (error) {
+ nandfs_error("%s: error:%d saving seg:%p\n",
+ __func__, error, seg);
+ goto out;
+ }
+ fsdev->nd_last_cno++;
+ fsdev->nd_last_pseg = seg->start_block;
+ }
+ }
+out:
+ if (error)
+ clean_seginfo(seginfo, unlock);
+ return (error);
+}
+
+static void
+nandfs_invalidate_bufs(struct nandfs_device *fsdev, uint64_t segno)
+{
+ uint64_t start, end;
+ struct buf *bp, *tbd;
+ struct bufobj *bo;
+
+ nandfs_get_segment_range(fsdev, segno, &start, &end);
+
+ bo = &NTOV(fsdev->nd_gc_node)->v_bufobj;
+
+ BO_LOCK(bo);
+restart_locked_gc:
+ TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, tbd) {
+ if (!(bp->b_lblkno >= start && bp->b_lblkno <= end))
+ continue;
+
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+ goto restart_locked_gc;
+
+ bremfree(bp);
+ bp->b_flags |= (B_INVAL | B_RELBUF);
+ bp->b_flags &= ~(B_ASYNC | B_MANAGED);
+ BO_UNLOCK(bo);
+ brelse(bp);
+ BO_LOCK(bo);
+ }
+ BO_UNLOCK(bo);
+}
+
+/* Process segments marks to free by cleaner */
+static void
+nandfs_process_segments(struct nandfs_device *fsdev)
+{
+ uint64_t saved_segment;
+ int i;
+
+ if (fsdev->nd_free_base) {
+ saved_segment = nandfs_get_segnum_of_block(fsdev,
+ fsdev->nd_super.s_last_pseg);
+ for (i = 0; i < fsdev->nd_free_count; i++) {
+ if (fsdev->nd_free_base[i] == NANDFS_NOSEGMENT)
+ continue;
+ /* Update superblock if clearing segment point by it */
+ if (fsdev->nd_free_base[i] == saved_segment) {
+ nandfs_write_superblock(fsdev);
+ saved_segment = nandfs_get_segnum_of_block(
+ fsdev, fsdev->nd_super.s_last_pseg);
+ }
+ nandfs_invalidate_bufs(fsdev, fsdev->nd_free_base[i]);
+ nandfs_clear_segment(fsdev, fsdev->nd_free_base[i]);
+ }
+
+ free(fsdev->nd_free_base, M_NANDFSTEMP);
+ fsdev->nd_free_base = NULL;
+ fsdev->nd_free_count = 0;
+ }
+}
+
+/* Collect and write dirty buffers */
+int
+nandfs_sync_file(struct vnode *vp)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_node *nandfs_node;
+ struct nandfsmount *nmp;
+ struct nandfs_node *dat, *su, *ifile, *cp;
+ struct nandfs_seginfo *seginfo = NULL;
+ struct nandfs_segment *seg;
+ int update, error;
+ int cno_changed;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ DPRINTF(SYNC, ("%s: START\n", __func__));
+
+ error = 0;
+ nmp = VFSTONANDFS(vp->v_mount);
+ fsdev = nmp->nm_nandfsdev;
+
+ dat = fsdev->nd_dat_node;
+ su = fsdev->nd_su_node;
+ cp = fsdev->nd_cp_node;
+ ifile = nmp->nm_ifile_node;
+
+ NANDFS_WRITEASSERT(fsdev);
+ if (lockmgr(&fsdev->nd_seg_const, LK_UPGRADE, NULL) != 0) {
+ DPRINTF(SYNC, ("%s: lost shared lock\n", __func__));
+ if (lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL) != 0)
+ panic("couldn't lock exclusive");
+ }
+ DPRINTF(SYNC, ("%s: got lock\n", __func__));
+
+ VOP_LOCK(NTOV(su), LK_EXCLUSIVE);
+ create_seginfo(fsdev, &seginfo);
+
+ update = 0;
+
+ nandfs_node = VTON(vp);
+ if (nandfs_node->nn_flags & IN_MODIFIED) {
+ nandfs_node->nn_flags &= ~(IN_MODIFIED);
+ update = 1;
+ }
+
+ if (vp->v_bufobj.bo_dirty.bv_cnt) {
+ error = nandfs_iterate_dirty_buf(vp, seginfo, 0);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d iterating dirty bufs vp:%p",
+ __func__, error, vp);
+ return (error);
+ }
+ update = 1;
+ }
+
+ if (update) {
+ VOP_LOCK(NTOV(ifile), LK_EXCLUSIVE);
+ error = nandfs_node_update(nandfs_node);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(ifile), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d updating vp:%p",
+ __func__, error, vp);
+ return (error);
+ }
+ VOP_UNLOCK(NTOV(ifile), 0);
+ }
+
+ cno_changed = 0;
+ if (seginfo->blocks) {
+ VOP_LOCK(NTOV(cp), LK_EXCLUSIVE);
+ cno_changed = 1;
+ /* Create new checkpoint */
+ error = nandfs_get_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(cp), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d getting cp:%jx",
+ __func__, error, fsdev->nd_last_cno + 1);
+ return (error);
+ }
+
+ /* Reiterate all blocks and assign physical block number */
+ nandfs_seginfo_assign_pblk(seginfo);
+
+ /* Fill checkpoint data */
+ error = nandfs_set_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1,
+ &ifile->nn_inode, seginfo->blocks);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(cp), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d setting cp:%jx",
+ __func__, error, fsdev->nd_last_cno + 1);
+ return (error);
+ }
+
+ VOP_UNLOCK(NTOV(cp), 0);
+ LIST_FOREACH(seg, &seginfo->seg_list, seg_link)
+ nandfs_update_segment(fsdev, seg->seg_num,
+ seg->nblocks + seg->segsum_blocks);
+
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ error = save_seginfo(seginfo, 0);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(dat), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d updating seg",
+ __func__, error);
+ return (error);
+ }
+ VOP_UNLOCK(NTOV(dat), 0);
+ }
+
+ VOP_UNLOCK(NTOV(su), 0);
+
+ delete_seginfo(seginfo);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+
+ if (cno_changed && !error) {
+ if (nandfs_cps_between_sblocks != 0 &&
+ fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0)
+ nandfs_write_superblock(fsdev);
+ }
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ DPRINTF(SYNC, ("%s: END error %d\n", __func__, error));
+ return (error);
+}
+
+int
+nandfs_segment_constructor(struct nandfsmount *nmp, int flags)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_seginfo *seginfo = NULL;
+ struct nandfs_segment *seg;
+ struct nandfs_node *dat, *su, *ifile, *cp, *gc;
+ int cno_changed, error;
+
+ DPRINTF(SYNC, ("%s: START\n", __func__));
+ fsdev = nmp->nm_nandfsdev;
+
+ lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL);
+ DPRINTF(SYNC, ("%s: git lock\n", __func__));
+again:
+ create_seginfo(fsdev, &seginfo);
+
+ dat = fsdev->nd_dat_node;
+ su = fsdev->nd_su_node;
+ cp = fsdev->nd_cp_node;
+ gc = fsdev->nd_gc_node;
+ ifile = nmp->nm_ifile_node;
+
+ VOP_LOCK(NTOV(su), LK_EXCLUSIVE);
+ VOP_LOCK(NTOV(ifile), LK_EXCLUSIVE);
+ VOP_LOCK(NTOV(gc), LK_EXCLUSIVE);
+ VOP_LOCK(NTOV(cp), LK_EXCLUSIVE);
+
+ nandfs_iterate_system_vnode(gc, seginfo);
+ nandfs_iterate_dirty_vnodes(nmp->nm_vfs_mountp, seginfo);
+ nandfs_iterate_system_vnode(ifile, seginfo);
+ nandfs_iterate_system_vnode(su, seginfo);
+
+ cno_changed = 0;
+ if (seginfo->blocks || flags) {
+ cno_changed = 1;
+ /* Create new checkpoint */
+ error = nandfs_get_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ goto error_locks;
+ }
+
+ /* Collect blocks from system files */
+ nandfs_iterate_system_vnode(cp, seginfo);
+ nandfs_iterate_system_vnode(su, seginfo);
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ nandfs_iterate_system_vnode(dat, seginfo);
+ VOP_UNLOCK(NTOV(dat), 0);
+reiterate:
+ seginfo->reiterate = 0;
+ nandfs_iterate_system_vnode(su, seginfo);
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ nandfs_iterate_system_vnode(dat, seginfo);
+ VOP_UNLOCK(NTOV(dat), 0);
+ if (seginfo->reiterate)
+ goto reiterate;
+ if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) {
+ error = create_segment(seginfo);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ goto error_locks;
+ }
+ goto reiterate;
+ }
+
+ /* Reiterate all blocks and assign physical block number */
+ nandfs_seginfo_assign_pblk(seginfo);
+
+ /* Fill superroot */
+ error = nandfs_add_superroot(seginfo);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ goto error_locks;
+ }
+ KASSERT(!(seginfo->reiterate), ("reiteration after superroot"));
+
+ /* Fill checkpoint data */
+ nandfs_set_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1,
+ &ifile->nn_inode, seginfo->blocks);
+
+ LIST_FOREACH(seg, &seginfo->seg_list, seg_link)
+ nandfs_update_segment(fsdev, seg->seg_num,
+ seg->nblocks + seg->segsum_blocks);
+
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ error = save_seginfo(seginfo, 1);
+ if (error) {
+ clean_seginfo(seginfo, 1);
+ delete_seginfo(seginfo);
+ goto error_dat;
+ }
+ VOP_UNLOCK(NTOV(dat), 0);
+ }
+
+ VOP_UNLOCK(NTOV(cp), 0);
+ VOP_UNLOCK(NTOV(gc), 0);
+ VOP_UNLOCK(NTOV(ifile), 0);
+
+ nandfs_process_segments(fsdev);
+
+ VOP_UNLOCK(NTOV(su), 0);
+
+ delete_seginfo(seginfo);
+
+ /*
+ * XXX: a hack, will go away soon
+ */
+ if ((NTOV(dat)->v_bufobj.bo_dirty.bv_cnt != 0 ||
+ NTOV(cp)->v_bufobj.bo_dirty.bv_cnt != 0 ||
+ NTOV(gc)->v_bufobj.bo_dirty.bv_cnt != 0 ||
+ NTOV(ifile)->v_bufobj.bo_dirty.bv_cnt != 0 ||
+ NTOV(su)->v_bufobj.bo_dirty.bv_cnt != 0) &&
+ (flags & NANDFS_UMOUNT)) {
+ DPRINTF(SYNC, ("%s: RERUN\n", __func__));
+ goto again;
+ }
+
+ MPASS(fsdev->nd_free_base == NULL);
+
+ lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL);
+
+ if (cno_changed) {
+ if ((nandfs_cps_between_sblocks != 0 &&
+ fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0) ||
+ flags & NANDFS_UMOUNT)
+ nandfs_write_superblock(fsdev);
+ }
+
+ DPRINTF(SYNC, ("%s: END\n", __func__));
+ return (0);
+error_dat:
+ VOP_UNLOCK(NTOV(dat), 0);
+error_locks:
+ VOP_UNLOCK(NTOV(cp), 0);
+ VOP_UNLOCK(NTOV(gc), 0);
+ VOP_UNLOCK(NTOV(ifile), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL);
+
+ return (error);
+}
+
+#ifdef DDB
+/*
+ * Show details about the given NANDFS mount point.
+ */
+DB_SHOW_COMMAND(nandfs, db_show_nandfs)
+{
+ struct mount *mp;
+ struct nandfs_device *nffsdev;
+ struct nandfs_segment *seg;
+ struct nandfsmount *nmp;
+ struct buf *bp;
+ struct vnode *vp;
+
+ if (!have_addr) {
+ db_printf("\nUsage: show nandfs <mount_addr>\n");
+ return;
+ }
+
+ mp = (struct mount *)addr;
+ db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname,
+ mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename);
+
+
+ nmp = (struct nandfsmount *)(mp->mnt_data);
+ nffsdev = nmp->nm_nandfsdev;
+ db_printf("dev vnode:%p\n", nffsdev->nd_devvp);
+ db_printf("blocksize:%jx last cno:%jx last pseg:%jx seg num:%jx\n",
+ (uintmax_t)nffsdev->nd_blocksize, (uintmax_t)nffsdev->nd_last_cno,
+ (uintmax_t)nffsdev->nd_last_pseg, (uintmax_t)nffsdev->nd_seg_num);
+ db_printf("system nodes: dat:%p cp:%p su:%p ifile:%p gc:%p\n",
+ nffsdev->nd_dat_node, nffsdev->nd_cp_node, nffsdev->nd_su_node,
+ nmp->nm_ifile_node, nffsdev->nd_gc_node);
+
+ if (nffsdev->nd_seginfo != NULL) {
+ LIST_FOREACH(seg, &nffsdev->nd_seginfo->seg_list, seg_link) {
+ db_printf("seg: %p\n", seg);
+ TAILQ_FOREACH(bp, &seg->segsum,
+ b_cluster.cluster_entry)
+ db_printf("segbp %p\n", bp);
+ TAILQ_FOREACH(bp, &seg->data,
+ b_cluster.cluster_entry) {
+ vp = bp->b_vp;
+ db_printf("bp:%p bp->b_vp:%p ino:%jx\n", bp, vp,
+ (uintmax_t)(vp ? VTON(vp)->nn_ino : 0));
+ }
+ }
+ }
+}
+#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_subr.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_subr.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,1120 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_subr.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/signalvar.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+#include <sys/libkern.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <machine/_inttypes.h>
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+
+MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount");;
+MALLOC_DEFINE(M_NANDFSTEMP, "nandfs_tmt", "NANDFS tmp");
+
+uma_zone_t nandfs_node_zone;
+
+void nandfs_bdflush(struct bufobj *bo, struct buf *bp);
+int nandfs_bufsync(struct bufobj *bo, int waitfor);
+
+struct buf_ops buf_ops_nandfs = {
+ .bop_name = "buf_ops_nandfs",
+ .bop_write = bufwrite,
+ .bop_strategy = bufstrategy,
+ .bop_sync = nandfs_bufsync,
+ .bop_bdflush = nandfs_bdflush,
+};
+
+int
+nandfs_bufsync(struct bufobj *bo, int waitfor)
+{
+ struct vnode *vp;
+ int error = 0;
+
+ vp = bo->__bo_vnode;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ error = nandfs_sync_file(vp);
+ if (error)
+ nandfs_warning("%s: cannot flush buffers err:%d\n",
+ __func__, error);
+
+ return (error);
+}
+
+void
+nandfs_bdflush(bo, bp)
+ struct bufobj *bo;
+ struct buf *bp;
+{
+ struct vnode *vp;
+ int error;
+
+ if (bo->bo_dirty.bv_cnt <= ((dirtybufthresh * 8) / 10))
+ return;
+
+ vp = bp->b_vp;
+ if (NANDFS_SYS_NODE(VTON(vp)->nn_ino))
+ return;
+
+ if (NANDFS_IS_INDIRECT(bp))
+ return;
+
+ error = nandfs_sync_file(vp);
+ if (error)
+ nandfs_warning("%s: cannot flush buffers err:%d\n",
+ __func__, error);
+}
+
+int
+nandfs_init(struct vfsconf *vfsp)
+{
+
+ nandfs_node_zone = uma_zcreate("nandfs node zone",
+ sizeof(struct nandfs_node), NULL, NULL, NULL, NULL, 0, 0);
+
+ return (0);
+}
+
+int
+nandfs_uninit(struct vfsconf *vfsp)
+{
+
+ uma_zdestroy(nandfs_node_zone);
+ return (0);
+}
+
+/* Basic calculators */
+uint64_t
+nandfs_get_segnum_of_block(struct nandfs_device *nandfsdev,
+ nandfs_daddr_t blocknr)
+{
+ uint64_t segnum, blks_per_seg;
+
+ MPASS(blocknr >= nandfsdev->nd_fsdata.f_first_data_block);
+
+ blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
+
+ segnum = blocknr / blks_per_seg;
+ segnum -= nandfsdev->nd_fsdata.f_first_data_block / blks_per_seg;
+
+ DPRINTF(SYNC, ("%s: returning blocknr %jx -> segnum %jx\n", __func__,
+ blocknr, segnum));
+
+ return (segnum);
+}
+
+void
+nandfs_get_segment_range(struct nandfs_device *nandfsdev, uint64_t segnum,
+ uint64_t *seg_start, uint64_t *seg_end)
+{
+ uint64_t blks_per_seg;
+
+ blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
+ *seg_start = nandfsdev->nd_fsdata.f_first_data_block +
+ blks_per_seg * segnum;
+ if (seg_end != NULL)
+ *seg_end = *seg_start + blks_per_seg -1;
+}
+
+void nandfs_calc_mdt_consts(struct nandfs_device *nandfsdev,
+ struct nandfs_mdt *mdt, int entry_size)
+{
+ uint32_t blocksize = nandfsdev->nd_blocksize;
+
+ mdt->entries_per_group = blocksize * 8;
+ mdt->entries_per_block = blocksize / entry_size;
+
+ mdt->blocks_per_group =
+ (mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
+ mdt->groups_per_desc_block =
+ blocksize / sizeof(struct nandfs_block_group_desc);
+ mdt->blocks_per_desc_block =
+ mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
+}
+
+int
+nandfs_dev_bread(struct nandfs_device *nandfsdev, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
+ int error;
+
+ DPRINTF(BLOCK, ("%s: read from block %jx vp %p\n", __func__,
+ blocknr * blk2dev, nandfsdev->nd_devvp));
+ error = bread(nandfsdev->nd_devvp, blocknr * blk2dev,
+ nandfsdev->nd_blocksize, NOCRED, bpp);
+ if (error)
+ nandfs_error("%s: cannot read from device - blk:%jx\n",
+ __func__, blocknr);
+ return (error);
+}
+
+/* Read on a node */
+int
+nandfs_bread(struct nandfs_node *node, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ nandfs_daddr_t vblk;
+ int error;
+
+ DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
+ blocknr));
+
+ error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
+ cred, bpp);
+
+ KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
+ NTOV(node), blocknr, error));
+
+ if (!nandfs_vblk_get(*bpp) &&
+ ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
+ nandfs_bmap_lookup(node, blocknr, &vblk);
+ nandfs_vblk_set(*bpp, vblk);
+ }
+ return (error);
+}
+
+int
+nandfs_bread_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ nandfs_daddr_t vblk;
+ int error;
+
+ DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
+ blocknr));
+
+ error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
+ cred, bpp);
+
+ KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
+ NTOV(node), blocknr, error));
+
+ if (!nandfs_vblk_get(*bpp) &&
+ ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
+ nandfs_bmap_lookup(node, blocknr, &vblk);
+ nandfs_vblk_set(*bpp, vblk);
+ }
+
+ return (error);
+}
+
+int
+nandfs_bdestroy(struct nandfs_node *node, nandfs_daddr_t vblk)
+{
+ int error;
+
+ if (!NANDFS_SYS_NODE(node->nn_ino))
+ NANDFS_WRITEASSERT(node->nn_nandfsdev);
+
+ error = nandfs_vblock_end(node->nn_nandfsdev, vblk);
+ if (error) {
+ nandfs_error("%s: ending vblk: %jx failed\n",
+ __func__, (uintmax_t)vblk);
+ return (error);
+ }
+ node->nn_inode.i_blocks--;
+
+ return (0);
+}
+
+int
+nandfs_bcreate(struct nandfs_node *node, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ int error;
+
+ ASSERT_VOP_LOCKED(NTOV(node), __func__);
+ if (!NANDFS_SYS_NODE(node->nn_ino))
+ NANDFS_WRITEASSERT(node->nn_nandfsdev);
+
+ DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
+ blocknr));
+
+ *bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
+ 0, 0, 0);
+
+ KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
+ NTOV(node), blocknr));
+
+ if (*bpp) {
+ vfs_bio_clrbuf(*bpp);
+ (*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
+ error = nandfs_bmap_insert_block(node, blocknr, *bpp);
+ if (error) {
+ nandfs_warning("%s: failed bmap insert node:%p"
+ " blk:%jx\n", __func__, node, blocknr);
+ brelse(*bpp);
+ return (error);
+ }
+ node->nn_inode.i_blocks++;
+
+ return (0);
+ }
+
+ return (-1);
+}
+
+int
+nandfs_bcreate_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ struct nandfs_device *fsdev;
+ nandfs_daddr_t vblk;
+ int error;
+
+ ASSERT_VOP_LOCKED(NTOV(node), __func__);
+ NANDFS_WRITEASSERT(node->nn_nandfsdev);
+
+ DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
+ blocknr));
+
+ fsdev = node->nn_nandfsdev;
+
+ *bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
+ 0, 0, 0);
+
+ KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
+ NTOV(node), blocknr));
+
+ memset((*bpp)->b_data, 0, fsdev->nd_blocksize);
+
+ vfs_bio_clrbuf(*bpp);
+ (*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
+
+ nandfs_buf_set(*bpp, NANDFS_VBLK_ASSIGNED);
+
+ if (node->nn_ino != NANDFS_DAT_INO) {
+ error = nandfs_vblock_alloc(fsdev, &vblk);
+ if (error) {
+ nandfs_buf_clear(*bpp, NANDFS_VBLK_ASSIGNED);
+ brelse(*bpp);
+ return (error);
+ }
+ } else
+ vblk = fsdev->nd_fakevblk++;
+
+ nandfs_vblk_set(*bpp, vblk);
+
+ nandfs_bmap_insert_block(node, blocknr, *bpp);
+ return (0);
+}
+
+/* Translate index to a file block number and an entry */
+void
+nandfs_mdt_trans(struct nandfs_mdt *mdt, uint64_t index,
+ nandfs_lbn_t *blocknr, uint32_t *entry_in_block)
+{
+ uint64_t blknr;
+ uint64_t group, group_offset, blocknr_in_group;
+ uint64_t desc_block, desc_offset;
+
+ /* Calculate our offset in the file */
+ group = index / mdt->entries_per_group;
+ group_offset = index % mdt->entries_per_group;
+ desc_block = group / mdt->groups_per_desc_block;
+ desc_offset = group % mdt->groups_per_desc_block;
+ blocknr_in_group = group_offset / mdt->entries_per_block;
+
+ /* To descgroup offset */
+ blknr = 1 + desc_block * mdt->blocks_per_desc_block;
+
+ /* To group offset */
+ blknr += desc_offset * mdt->blocks_per_group;
+
+ /* To actual file block */
+ blknr += 1 + blocknr_in_group;
+
+ *blocknr = blknr;
+ *entry_in_block = group_offset % mdt->entries_per_block;
+}
+
+void
+nandfs_mdt_trans_blk(struct nandfs_mdt *mdt, uint64_t index,
+ uint64_t *desc, uint64_t *bitmap, nandfs_lbn_t *blocknr,
+ uint32_t *entry_in_block)
+{
+ uint64_t blknr;
+ uint64_t group, group_offset, blocknr_in_group;
+ uint64_t desc_block, desc_offset;
+
+ /* Calculate our offset in the file */
+ group = index / mdt->entries_per_group;
+ group_offset = index % mdt->entries_per_group;
+ desc_block = group / mdt->groups_per_desc_block;
+ desc_offset = group % mdt->groups_per_desc_block;
+ blocknr_in_group = group_offset / mdt->entries_per_block;
+
+ /* To descgroup offset */
+ *desc = desc_block * mdt->blocks_per_desc_block;
+ blknr = 1 + desc_block * mdt->blocks_per_desc_block;
+
+ /* To group offset */
+ blknr += desc_offset * mdt->blocks_per_group;
+ *bitmap = blknr;
+
+ /* To actual file block */
+ blknr += 1 + blocknr_in_group;
+
+ *blocknr = blknr;
+ *entry_in_block = group_offset % mdt->entries_per_block;
+
+ DPRINTF(ALLOC,
+ ("%s: desc_buf: %jx bitmap_buf: %jx entry_buf: %jx entry: %x\n",
+ __func__, (uintmax_t)*desc, (uintmax_t)*bitmap,
+ (uintmax_t)*blocknr, *entry_in_block));
+}
+
+int
+nandfs_vtop(struct nandfs_node *node, nandfs_daddr_t vblocknr,
+ nandfs_daddr_t *pblocknr)
+{
+ struct nandfs_node *dat_node;
+ struct nandfs_dat_entry *entry;
+ struct buf *bp;
+ nandfs_lbn_t ldatblknr;
+ uint32_t entry_in_block;
+ int locked, error;
+
+ if (node->nn_ino == NANDFS_DAT_INO || node->nn_ino == NANDFS_GC_INO) {
+ *pblocknr = vblocknr;
+ return (0);
+ }
+
+ /* only translate valid vblocknrs */
+ if (vblocknr == 0)
+ return (0);
+
+ dat_node = node->nn_nandfsdev->nd_dat_node;
+ nandfs_mdt_trans(&node->nn_nandfsdev->nd_dat_mdt, vblocknr, &ldatblknr,
+ &entry_in_block);
+
+ locked = NANDFS_VOP_ISLOCKED(NTOV(dat_node));
+ if (!locked)
+ VOP_LOCK(NTOV(dat_node), LK_SHARED);
+ error = nandfs_bread(dat_node, ldatblknr, NOCRED, 0, &bp);
+ if (error) {
+ DPRINTF(TRANSLATE, ("vtop: can't read in DAT block %#jx!\n",
+ (uintmax_t)ldatblknr));
+ brelse(bp);
+ VOP_UNLOCK(NTOV(dat_node), 0);
+ return (error);
+ }
+
+ /* Get our translation */
+ entry = ((struct nandfs_dat_entry *) bp->b_data) + entry_in_block;
+ DPRINTF(TRANSLATE, ("\tentry %p data %p entry_in_block %x\n",
+ entry, bp->b_data, entry_in_block))
+ DPRINTF(TRANSLATE, ("\tvblk %#jx -> %#jx for cp [%#jx-%#jx]\n",
+ (uintmax_t)vblocknr, (uintmax_t)entry->de_blocknr,
+ (uintmax_t)entry->de_start, (uintmax_t)entry->de_end));
+
+ *pblocknr = entry->de_blocknr;
+ brelse(bp);
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat_node), 0);
+
+ MPASS(*pblocknr >= node->nn_nandfsdev->nd_fsdata.f_first_data_block ||
+ *pblocknr == 0);
+
+ return (0);
+}
+
+int
+nandfs_segsum_valid(struct nandfs_segment_summary *segsum)
+{
+
+ return (segsum->ss_magic == NANDFS_SEGSUM_MAGIC);
+}
+
+int
+nandfs_load_segsum(struct nandfs_device *fsdev, nandfs_daddr_t blocknr,
+ struct nandfs_segment_summary *segsum)
+{
+ struct buf *bp;
+ int error;
+
+ DPRINTF(VOLUMES, ("nandfs: try segsum at block %jx\n",
+ (uintmax_t)blocknr));
+
+ error = nandfs_dev_bread(fsdev, blocknr, NOCRED, 0, &bp);
+ if (error)
+ return (error);
+
+ memcpy(segsum, bp->b_data, sizeof(struct nandfs_segment_summary));
+ brelse(bp);
+
+ if (!nandfs_segsum_valid(segsum)) {
+ DPRINTF(VOLUMES, ("%s: bad magic pseg:%jx\n", __func__,
+ blocknr));
+ return (EINVAL);
+ }
+
+ return (error);
+}
+
+static int
+nandfs_load_super_root(struct nandfs_device *nandfsdev,
+ struct nandfs_segment_summary *segsum, uint64_t pseg)
+{
+ struct nandfs_super_root super_root;
+ struct buf *bp;
+ uint64_t blocknr;
+ uint32_t super_root_crc, comp_crc;
+ int off, error;
+
+ /* Check if there is a superroot */
+ if ((segsum->ss_flags & NANDFS_SS_SR) == 0) {
+ DPRINTF(VOLUMES, ("%s: no super root in pseg:%jx\n", __func__,
+ pseg));
+ return (ENOENT);
+ }
+
+ /* Get our super root, located at the end of the pseg */
+ blocknr = pseg + segsum->ss_nblocks - 1;
+ DPRINTF(VOLUMES, ("%s: try at %#jx\n", __func__, (uintmax_t)blocknr));
+
+ error = nandfs_dev_bread(nandfsdev, blocknr, NOCRED, 0, &bp);
+ if (error)
+ return (error);
+
+ memcpy(&super_root, bp->b_data, sizeof(struct nandfs_super_root));
+ brelse(bp);
+
+ /* Check super root CRC */
+ super_root_crc = super_root.sr_sum;
+ off = sizeof(super_root.sr_sum);
+ comp_crc = crc32((uint8_t *)&super_root + off,
+ NANDFS_SR_BYTES - off);
+
+ if (super_root_crc != comp_crc) {
+ DPRINTF(VOLUMES, ("%s: invalid crc:%#x [expect:%#x]\n",
+ __func__, super_root_crc, comp_crc));
+ return (EINVAL);
+ }
+
+ nandfsdev->nd_super_root = super_root;
+ DPRINTF(VOLUMES, ("%s: got valid superroot\n", __func__));
+
+ return (0);
+}
+
+/*
+ * Search for the last super root recorded.
+ */
+int
+nandfs_search_super_root(struct nandfs_device *nandfsdev)
+{
+ struct nandfs_super_block *super;
+ struct nandfs_segment_summary segsum;
+ uint64_t seg_start, seg_end, cno, seq, create, pseg;
+ uint64_t segnum;
+ int error, found;
+
+ error = found = 0;
+
+ /* Search for last super root */
+ pseg = nandfsdev->nd_super.s_last_pseg;
+ segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
+
+ cno = nandfsdev->nd_super.s_last_cno;
+ create = seq = 0;
+ DPRINTF(VOLUMES, ("%s: start in pseg %#jx\n", __func__,
+ (uintmax_t)pseg));
+
+ for (;;) {
+ error = nandfs_load_segsum(nandfsdev, pseg, &segsum);
+ if (error)
+ break;
+
+ if (segsum.ss_seq < seq || segsum.ss_create < create)
+ break;
+
+ /* Try to load super root */
+ if (segsum.ss_flags & NANDFS_SS_SR) {
+ error = nandfs_load_super_root(nandfsdev, &segsum, pseg);
+ if (error)
+ break; /* confused */
+ found = 1;
+
+ super = &nandfsdev->nd_super;
+ nandfsdev->nd_last_segsum = segsum;
+ super->s_last_pseg = pseg;
+ super->s_last_cno = cno++;
+ super->s_last_seq = segsum.ss_seq;
+ super->s_state = NANDFS_VALID_FS;
+ seq = segsum.ss_seq;
+ create = segsum.ss_create;
+ } else {
+ seq = segsum.ss_seq;
+ create = segsum.ss_create;
+ }
+
+ /* Calculate next partial segment location */
+ pseg += segsum.ss_nblocks;
+ DPRINTF(VOLUMES, ("%s: next partial seg is %jx\n", __func__,
+ (uintmax_t)pseg));
+
+ /* Did we reach the end of the segment? if so, go to the next */
+ nandfs_get_segment_range(nandfsdev, segnum, &seg_start,
+ &seg_end);
+ if (pseg >= seg_end) {
+ pseg = segsum.ss_next;
+ DPRINTF(VOLUMES,
+ (" partial seg oor next is %jx[%jx - %jx]\n",
+ (uintmax_t)pseg, (uintmax_t)seg_start,
+ (uintmax_t)seg_end));
+ }
+ segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
+ }
+
+ if (error && !found)
+ return (error);
+
+ return (0);
+}
+
+int
+nandfs_get_node_raw(struct nandfs_device *nandfsdev, struct nandfsmount *nmp,
+ uint64_t ino, struct nandfs_inode *inode, struct nandfs_node **nodep)
+{
+ struct nandfs_node *node;
+ struct vnode *nvp;
+ struct mount *mp;
+ int error;
+
+ *nodep = NULL;
+
+ /* Associate with mountpoint if present */
+ if (nmp) {
+ mp = nmp->nm_vfs_mountp;
+ error = getnewvnode("nandfs", mp, &nandfs_vnodeops, &nvp);
+ if (error) {
+ return (error);
+ }
+ } else {
+ mp = NULL;
+ error = getnewvnode("snandfs", mp, &nandfs_system_vnodeops,
+ &nvp);
+ if (error) {
+ return (error);
+ }
+ }
+
+ if (mp)
+ NANDFS_WRITELOCK(nandfsdev);
+
+ DPRINTF(IFILE, ("%s: ino: %#jx -> vp: %p\n",
+ __func__, (uintmax_t)ino, nvp));
+ /* Lock node */
+ lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL);
+
+ if (mp) {
+ error = insmntque(nvp, mp);
+ if (error != 0) {
+ *nodep = NULL;
+ return (error);
+ }
+ }
+
+ node = uma_zalloc(nandfs_node_zone, M_WAITOK | M_ZERO);
+
+ /* Crosslink */
+ node->nn_vnode = nvp;
+ nvp->v_bufobj.bo_ops = &buf_ops_nandfs;
+ node->nn_nmp = nmp;
+ node->nn_nandfsdev = nandfsdev;
+ nvp->v_data = node;
+
+ /* Initiase NANDFS node */
+ node->nn_ino = ino;
+ if (inode != NULL)
+ node->nn_inode = *inode;
+
+ nandfs_vinit(nvp, ino);
+
+ /* Return node */
+ *nodep = node;
+ DPRINTF(IFILE, ("%s: ino:%#jx vp:%p node:%p\n",
+ __func__, (uintmax_t)ino, nvp, *nodep));
+
+ return (0);
+}
+
+int
+nandfs_get_node(struct nandfsmount *nmp, uint64_t ino,
+ struct nandfs_node **nodep)
+{
+ struct nandfs_device *nandfsdev;
+ struct nandfs_inode inode, *entry;
+ struct vnode *nvp, *vpp;
+ struct thread *td;
+ struct buf *bp;
+ uint64_t ivblocknr;
+ uint32_t entry_in_block;
+ int error;
+
+ /* Look up node in hash table */
+ td = curthread;
+ *nodep = NULL;
+
+ if ((ino < NANDFS_ATIME_INO) && (ino != NANDFS_ROOT_INO)) {
+ printf("nandfs_get_node: system ino %"PRIu64" not in mount "
+ "point!\n", ino);
+ return (ENOENT);
+ }
+
+ error = vfs_hash_get(nmp->nm_vfs_mountp, ino, LK_EXCLUSIVE, td, &nvp,
+ NULL, NULL);
+ if (error)
+ return (error);
+
+ if (nvp != NULL) {
+ *nodep = (struct nandfs_node *)nvp->v_data;
+ return (0);
+ }
+
+ /* Look up inode structure in mountpoints ifile */
+ nandfsdev = nmp->nm_nandfsdev;
+ nandfs_mdt_trans(&nandfsdev->nd_ifile_mdt, ino, &ivblocknr,
+ &entry_in_block);
+
+ VOP_LOCK(NTOV(nmp->nm_ifile_node), LK_SHARED);
+ error = nandfs_bread(nmp->nm_ifile_node, ivblocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
+ return (ENOENT);
+ }
+
+ /* Get inode entry */
+ entry = (struct nandfs_inode *) bp->b_data + entry_in_block;
+ memcpy(&inode, entry, sizeof(struct nandfs_inode));
+ brelse(bp);
+ VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
+
+ /* Get node */
+ error = nandfs_get_node_raw(nmp->nm_nandfsdev, nmp, ino, &inode, nodep);
+ if (error) {
+ *nodep = NULL;
+ return (error);
+ }
+
+ nvp = (*nodep)->nn_vnode;
+ error = vfs_hash_insert(nvp, ino, 0, td, &vpp, NULL, NULL);
+ if (error) {
+ *nodep = NULL;
+ return (error);
+ }
+
+ return (error);
+}
+
+void
+nandfs_dispose_node(struct nandfs_node **nodep)
+{
+ struct nandfs_node *node;
+ struct vnode *vp;
+
+ /* Protect against rogue values */
+ node = *nodep;
+ if (!node) {
+ return;
+ }
+ DPRINTF(NODE, ("nandfs_dispose_node: %p\n", *nodep));
+
+ vp = NTOV(node);
+ vp->v_data = NULL;
+
+ /* Free our associated memory */
+ uma_zfree(nandfs_node_zone, node);
+
+ *nodep = NULL;
+}
+
+int
+nandfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
+ uint64_t *ino, int *found, uint64_t *off)
+{
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfs_dir_entry *ndirent;
+ struct buf *bp;
+ uint64_t file_size, diroffset, blkoff;
+ uint64_t blocknr;
+ uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize;
+ uint8_t *pos, name_len;
+ int error;
+
+ *found = 0;
+
+ DPRINTF(VNCALL, ("%s: %s file\n", __func__, name));
+ if (dvp->v_type != VDIR) {
+ return (ENOTDIR);
+ }
+
+ /* Get directory filesize */
+ file_size = dir_node->nn_inode.i_size;
+
+ /* Walk the directory */
+ diroffset = 0;
+ blocknr = 0;
+ blkoff = 0;
+ error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (EIO);
+ }
+
+ while (diroffset < file_size) {
+ if (blkoff >= blocksize) {
+ blkoff = 0; blocknr++;
+ brelse(bp);
+ error = nandfs_bread(dir_node, blocknr, NOCRED, 0,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return (EIO);
+ }
+ }
+
+ /* Read in one dirent */
+ pos = (uint8_t *) bp->b_data + blkoff;
+ ndirent = (struct nandfs_dir_entry *) pos;
+ name_len = ndirent->name_len;
+
+ if ((name_len == namelen) &&
+ (strncmp(name, ndirent->name, name_len) == 0) &&
+ (ndirent->inode != 0)) {
+ *ino = ndirent->inode;
+ *off = diroffset;
+ DPRINTF(LOOKUP, ("found `%.*s` with ino %"PRIx64"\n",
+ name_len, ndirent->name, *ino));
+ *found = 1;
+ break;
+ }
+
+ /* Advance */
+ diroffset += ndirent->rec_len;
+ blkoff += ndirent->rec_len;
+ }
+ brelse(bp);
+
+ return (error);
+}
+
+int
+nandfs_get_fsinfo(struct nandfsmount *nmp, struct nandfs_fsinfo *fsinfo)
+{
+ struct nandfs_device *fsdev;
+
+ fsdev = nmp->nm_nandfsdev;
+
+ memcpy(&fsinfo->fs_fsdata, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
+ memcpy(&fsinfo->fs_super, &fsdev->nd_super, sizeof(fsdev->nd_super));
+ snprintf(fsinfo->fs_dev, sizeof(fsinfo->fs_dev),
+ "%s", nmp->nm_vfs_mountp->mnt_stat.f_mntfromname);
+
+ return (0);
+}
+
+void
+nandfs_inode_init(struct nandfs_inode *inode, uint16_t mode)
+{
+ struct timespec ts;
+
+ vfs_timestamp(&ts);
+
+ inode->i_blocks = 0;
+ inode->i_size = 0;
+ inode->i_ctime = ts.tv_sec;
+ inode->i_ctime_nsec = ts.tv_nsec;
+ inode->i_mtime = ts.tv_sec;
+ inode->i_mtime_nsec = ts.tv_nsec;
+ inode->i_mode = mode;
+ inode->i_links_count = 1;
+ if (S_ISDIR(mode))
+ inode->i_links_count = 2;
+ inode->i_flags = 0;
+
+ inode->i_special = 0;
+ memset(inode->i_db, 0, sizeof(inode->i_db));
+ memset(inode->i_ib, 0, sizeof(inode->i_ib));
+}
+
+void
+nandfs_inode_destroy(struct nandfs_inode *inode)
+{
+
+ MPASS(inode->i_blocks == 0);
+ bzero(inode, sizeof(*inode));
+}
+
+int
+nandfs_fs_full(struct nandfs_device *nffsdev)
+{
+ uint64_t space, bps;
+
+ bps = nffsdev->nd_fsdata.f_blocks_per_segment;
+ space = (nffsdev->nd_clean_segs - 1) * bps;
+
+ DPRINTF(BUF, ("%s: bufs:%jx space:%jx\n", __func__,
+ (uintmax_t)nffsdev->nd_dirty_bufs, (uintmax_t)space));
+
+ if (nffsdev->nd_dirty_bufs + (10 * bps) >= space)
+ return (1);
+
+ return (0);
+}
+
+static int
+_nandfs_dirty_buf(struct buf *bp, int dirty_meta, int force)
+{
+ struct nandfs_device *nffsdev;
+ struct nandfs_node *node;
+ uint64_t ino, bps;
+
+ if (NANDFS_ISGATHERED(bp)) {
+ bqrelse(bp);
+ return (0);
+ }
+ if ((bp->b_flags & (B_MANAGED | B_DELWRI)) == (B_MANAGED | B_DELWRI)) {
+ bqrelse(bp);
+ return (0);
+ }
+
+ node = VTON(bp->b_vp);
+ nffsdev = node->nn_nandfsdev;
+ DPRINTF(BUF, ("%s: buf:%p\n", __func__, bp));
+ ino = node->nn_ino;
+
+ if (nandfs_fs_full(nffsdev) && !NANDFS_SYS_NODE(ino) && !force) {
+ brelse(bp);
+ return (ENOSPC);
+ }
+
+ bp->b_flags |= B_MANAGED;
+ bdwrite(bp);
+
+ nandfs_dirty_bufs_increment(nffsdev);
+
+ KASSERT((bp->b_vp), ("vp missing for bp"));
+ KASSERT((nandfs_vblk_get(bp) || ino == NANDFS_DAT_INO),
+ ("bp vblk is 0"));
+
+ /*
+ * To maintain consistency of FS we need to force making
+ * meta buffers dirty, even if free space is low.
+ */
+ if (dirty_meta && ino != NANDFS_GC_INO)
+ nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1);
+
+ bps = nffsdev->nd_fsdata.f_blocks_per_segment;
+
+ if (nffsdev->nd_dirty_bufs >= (bps * nandfs_max_dirty_segs)) {
+ mtx_lock(&nffsdev->nd_sync_mtx);
+ if (nffsdev->nd_syncing == 0) {
+ DPRINTF(SYNC, ("%s: wakeup gc\n", __func__));
+ nffsdev->nd_syncing = 1;
+ wakeup(&nffsdev->nd_syncing);
+ }
+ mtx_unlock(&nffsdev->nd_sync_mtx);
+ }
+
+ return (0);
+}
+
+int
+nandfs_dirty_buf(struct buf *bp, int force)
+{
+
+ return (_nandfs_dirty_buf(bp, 1, force));
+}
+
+int
+nandfs_dirty_buf_meta(struct buf *bp, int force)
+{
+
+ return (_nandfs_dirty_buf(bp, 0, force));
+}
+
+void
+nandfs_undirty_buf_fsdev(struct nandfs_device *nffsdev, struct buf *bp)
+{
+
+ BUF_ASSERT_HELD(bp);
+
+ if (bp->b_flags & B_DELWRI) {
+ bp->b_flags &= ~(B_DELWRI|B_MANAGED);
+ nandfs_dirty_bufs_decrement(nffsdev);
+ }
+ /*
+ * Since it is now being written, we can clear its deferred write flag.
+ */
+ bp->b_flags &= ~B_DEFERRED;
+
+ brelse(bp);
+}
+
+void
+nandfs_undirty_buf(struct buf *bp)
+{
+ struct nandfs_node *node;
+
+ node = VTON(bp->b_vp);
+
+ nandfs_undirty_buf_fsdev(node->nn_nandfsdev, bp);
+}
+
+void
+nandfs_vblk_set(struct buf *bp, nandfs_daddr_t blocknr)
+{
+
+ nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
+ *vblk = blocknr;
+}
+
+nandfs_daddr_t
+nandfs_vblk_get(struct buf *bp)
+{
+
+ nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
+ return (*vblk);
+}
+
+void
+nandfs_buf_set(struct buf *bp, uint32_t bits)
+{
+ uintptr_t flags;
+
+ flags = (uintptr_t)bp->b_fsprivate3;
+ flags |= (uintptr_t)bits;
+ bp->b_fsprivate3 = (void *)flags;
+}
+
+void
+nandfs_buf_clear(struct buf *bp, uint32_t bits)
+{
+ uintptr_t flags;
+
+ flags = (uintptr_t)bp->b_fsprivate3;
+ flags &= ~(uintptr_t)bits;
+ bp->b_fsprivate3 = (void *)flags;
+}
+
+int
+nandfs_buf_check(struct buf *bp, uint32_t bits)
+{
+ uintptr_t flags;
+
+ flags = (uintptr_t)bp->b_fsprivate3;
+ if (flags & bits)
+ return (1);
+ return (0);
+}
+
+int
+nandfs_erase(struct nandfs_device *fsdev, off_t offset, size_t size)
+{
+ struct buf *bp;
+ int read_size, error, i;
+
+ DPRINTF(BLOCK, ("%s: performing erase at offset %jx size %zx\n",
+ __func__, offset, size));
+
+ MPASS(size % fsdev->nd_erasesize == 0);
+
+ if (fsdev->nd_is_nand) {
+ error = g_delete_data(fsdev->nd_gconsumer, offset, size);
+ return (error);
+ }
+
+ if (size > MAXBSIZE)
+ read_size = MAXBSIZE;
+ else
+ read_size = size;
+
+ error = 0;
+ for (i = 0; i < size / MAXBSIZE; i++) {
+ error = bread(fsdev->nd_devvp, btodb(offset + i * read_size),
+ read_size, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ memset(bp->b_data, 0xff, read_size);
+ error = bwrite(bp);
+ if (error) {
+ nandfs_error("%s: err:%d from bwrite\n",
+ __func__, error);
+ return (error);
+ }
+ }
+
+ return (error);
+}
+
+int
+nandfs_vop_islocked(struct vnode *vp)
+{
+ int islocked;
+
+ islocked = VOP_ISLOCKED(vp);
+ return (islocked == LK_EXCLUSIVE || islocked == LK_SHARED);
+}
+
+nandfs_daddr_t
+nandfs_block_to_dblock(struct nandfs_device *fsdev, nandfs_lbn_t block)
+{
+
+ return (btodb(block * fsdev->nd_blocksize));
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_subr.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_subr.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,238 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_subr.h,v 1.1 2009/07/18 16:31:42 reinoud
+ *
+ * $FreeBSD: head/sys/fs/nandfs/nandfs_subr.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _FS_NANDFS_NANDFS_SUBR_H_
+#define _FS_NANDFS_NANDFS_SUBR_H_
+
+struct nandfs_mdt;
+
+struct nandfs_alloc_request
+{
+ uint64_t entrynum;
+ struct buf *bp_desc;
+ struct buf *bp_bitmap;
+ struct buf *bp_entry;
+};
+
+/* Segment creation */
+void nandfs_wakeup_wait_sync(struct nandfs_device *, int);
+int nandfs_segment_constructor(struct nandfsmount *, int);
+int nandfs_sync_file(struct vnode *);
+
+/* Basic calculators */
+uint64_t nandfs_get_segnum_of_block(struct nandfs_device *, nandfs_daddr_t);
+void nandfs_get_segment_range(struct nandfs_device *, uint64_t, uint64_t *,
+ uint64_t *);
+void nandfs_calc_mdt_consts(struct nandfs_device *, struct nandfs_mdt *, int);
+
+/* Log reading / volume helpers */
+int nandfs_search_super_root(struct nandfs_device *);
+
+/* Reading */
+int nandfs_dev_bread(struct nandfs_device *, nandfs_daddr_t, struct ucred *,
+ int, struct buf **);
+int nandfs_bread(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int,
+ struct buf **);
+int nandfs_bread_meta(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int,
+ struct buf **);
+int nandfs_bdestroy(struct nandfs_node *, nandfs_daddr_t);
+int nandfs_bcreate(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int,
+ struct buf **);
+int nandfs_bcreate_meta(struct nandfs_node *, nandfs_lbn_t, struct ucred *,
+ int, struct buf **);
+int nandfs_bread_create(struct nandfs_node *, nandfs_lbn_t, struct ucred *,
+ int, struct buf **);
+
+/* vtop operations */
+int nandfs_vtop(struct nandfs_node *, nandfs_daddr_t, nandfs_daddr_t *);
+
+/* Node action implementators */
+int nandfs_vinit(struct vnode *, uint64_t);
+int nandfs_get_node(struct nandfsmount *, uint64_t, struct nandfs_node **);
+int nandfs_get_node_raw(struct nandfs_device *, struct nandfsmount *, uint64_t,
+ struct nandfs_inode *, struct nandfs_node **);
+void nandfs_dispose_node(struct nandfs_node **);
+
+void nandfs_itimes(struct vnode *);
+int nandfs_lookup_name_in_dir(struct vnode *, const char *, int, uint64_t *,
+ int *, uint64_t *);
+int nandfs_create_node(struct vnode *, struct vnode **, struct vattr *,
+ struct componentname *);
+void nandfs_delete_node(struct nandfs_node *);
+
+int nandfs_chsize(struct vnode *, u_quad_t, struct ucred *);
+int nandfs_dir_detach(struct nandfsmount *, struct nandfs_node *,
+ struct nandfs_node *, struct componentname *);
+int nandfs_dir_attach(struct nandfsmount *, struct nandfs_node *,
+ struct nandfs_node *, struct vattr *, struct componentname *);
+
+int nandfs_dirty_buf(struct buf *, int);
+int nandfs_dirty_buf_meta(struct buf *, int);
+int nandfs_fs_full(struct nandfs_device *);
+void nandfs_undirty_buf_fsdev(struct nandfs_device *, struct buf *);
+void nandfs_undirty_buf(struct buf *);
+
+void nandfs_clear_buf(struct buf *);
+void nandfs_buf_set(struct buf *, uint32_t);
+void nandfs_buf_clear(struct buf *, uint32_t);
+int nandfs_buf_check(struct buf *, uint32_t);
+
+int nandfs_find_free_entry(struct nandfs_mdt *, struct nandfs_node *,
+ struct nandfs_alloc_request *);
+int nandfs_find_entry(struct nandfs_mdt *, struct nandfs_node *,
+ struct nandfs_alloc_request *);
+int nandfs_alloc_entry(struct nandfs_mdt *, struct nandfs_alloc_request *);
+void nandfs_abort_entry(struct nandfs_alloc_request *);
+int nandfs_free_entry(struct nandfs_mdt *, struct nandfs_alloc_request *);
+int nandfs_get_entry_block(struct nandfs_mdt *, struct nandfs_node *,
+ struct nandfs_alloc_request *, uint32_t *, int);
+
+/* inode managment */
+int nandfs_node_create(struct nandfsmount *, struct nandfs_node **, uint16_t);
+int nandfs_node_destroy(struct nandfs_node *);
+int nandfs_node_update(struct nandfs_node *);
+int nandfs_get_node_entry(struct nandfsmount *, struct nandfs_inode **,
+ uint64_t, struct buf **);
+void nandfs_mdt_trans_blk(struct nandfs_mdt *, uint64_t, uint64_t *,
+ uint64_t *, nandfs_lbn_t *, uint32_t *);
+
+/* vblock management */
+void nandfs_mdt_trans(struct nandfs_mdt *, uint64_t, nandfs_lbn_t *, uint32_t *);
+int nandfs_vblock_alloc(struct nandfs_device *, nandfs_daddr_t *);
+int nandfs_vblock_end(struct nandfs_device *, nandfs_daddr_t);
+int nandfs_vblock_assign(struct nandfs_device *, nandfs_daddr_t,
+ nandfs_lbn_t);
+int nandfs_vblock_free(struct nandfs_device *, nandfs_daddr_t);
+
+/* Checkpoint management */
+int nandfs_get_checkpoint(struct nandfs_device *, struct nandfs_node *,
+ uint64_t);
+int nandfs_set_checkpoint(struct nandfs_device *, struct nandfs_node *,
+ uint64_t, struct nandfs_inode *, uint64_t);
+
+/* Segment management */
+int nandfs_alloc_segment(struct nandfs_device *, uint64_t *);
+int nandfs_update_segment(struct nandfs_device *, uint64_t, uint32_t);
+int nandfs_free_segment(struct nandfs_device *, uint64_t);
+int nandfs_clear_segment(struct nandfs_device *, uint64_t);
+int nandfs_touch_segment(struct nandfs_device *, uint64_t);
+int nandfs_markgc_segment(struct nandfs_device *, uint64_t);
+
+int nandfs_bmap_insert_block(struct nandfs_node *, nandfs_lbn_t, struct buf *);
+int nandfs_bmap_update_block(struct nandfs_node *, struct buf *, nandfs_lbn_t);
+int nandfs_bmap_update_dat(struct nandfs_node *, nandfs_daddr_t, struct buf *);
+int nandfs_bmap_dirty_blocks(struct nandfs_node *, struct buf *, int);
+int nandfs_bmap_truncate_mapping(struct nandfs_node *, nandfs_lbn_t,
+ nandfs_lbn_t);
+int nandfs_bmap_lookup(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t *);
+
+/* dirent */
+int nandfs_add_dirent(struct vnode *, uint64_t, char *, long, uint8_t);
+int nandfs_remove_dirent(struct vnode *, struct nandfs_node *,
+ struct componentname *);
+int nandfs_update_dirent(struct vnode *, struct nandfs_node *,
+ struct nandfs_node *);
+int nandfs_init_dir(struct vnode *, uint64_t, uint64_t);
+int nandfs_update_parent_dir(struct vnode *, uint64_t);
+
+void nandfs_vblk_set(struct buf *, nandfs_daddr_t);
+nandfs_daddr_t nandfs_vblk_get(struct buf *);
+
+void nandfs_inode_init(struct nandfs_inode *, uint16_t);
+void nandfs_inode_destroy(struct nandfs_inode *);
+
+/* ioctl */
+int nandfs_get_seg_stat(struct nandfs_device *, struct nandfs_seg_stat *);
+int nandfs_chng_cpmode(struct nandfs_node *, struct nandfs_cpmode *);
+int nandfs_get_cpinfo_ioctl(struct nandfs_node *, struct nandfs_argv *);
+int nandfs_delete_cp(struct nandfs_node *, uint64_t start, uint64_t);
+int nandfs_make_snap(struct nandfs_device *, uint64_t *);
+int nandfs_delete_snap(struct nandfs_device *, uint64_t);
+int nandfs_get_cpstat(struct nandfs_node *, struct nandfs_cpstat *);
+int nandfs_get_segment_info_ioctl(struct nandfs_device *, struct nandfs_argv *);
+int nandfs_get_dat_vinfo_ioctl(struct nandfs_device *, struct nandfs_argv *);
+int nandfs_get_dat_bdescs_ioctl(struct nandfs_device *, struct nandfs_argv *);
+int nandfs_get_fsinfo(struct nandfsmount *, struct nandfs_fsinfo *);
+
+int nandfs_get_cpinfo(struct nandfs_node *, uint64_t, uint16_t,
+ struct nandfs_cpinfo *, uint32_t, uint32_t *);
+
+nandfs_lbn_t nandfs_get_maxfilesize(struct nandfs_device *);
+
+int nandfs_write_superblock(struct nandfs_device *);
+
+extern int nandfs_sync_interval;
+extern int nandfs_max_dirty_segs;
+extern int nandfs_cps_between_sblocks;
+
+struct buf *nandfs_geteblk(int, int);
+
+void nandfs_dirty_bufs_increment(struct nandfs_device *);
+void nandfs_dirty_bufs_decrement(struct nandfs_device *);
+
+int nandfs_start_cleaner(struct nandfs_device *);
+int nandfs_stop_cleaner(struct nandfs_device *);
+
+int nandfs_segsum_valid(struct nandfs_segment_summary *);
+int nandfs_load_segsum(struct nandfs_device *, nandfs_daddr_t,
+ struct nandfs_segment_summary *);
+int nandfs_get_segment_info(struct nandfs_device *, struct nandfs_suinfo *,
+ uint32_t, uint64_t);
+int nandfs_get_segment_info_filter(struct nandfs_device *,
+ struct nandfs_suinfo *, uint32_t, uint64_t, uint64_t *, uint32_t, uint32_t);
+int nandfs_get_dat_vinfo(struct nandfs_device *, struct nandfs_vinfo *,
+ uint32_t);
+int nandfs_get_dat_bdescs(struct nandfs_device *, struct nandfs_bdesc *,
+ uint32_t);
+
+#define NANDFS_VBLK_ASSIGNED 1
+
+#define NANDFS_IS_INDIRECT(bp) ((bp)->b_lblkno < 0)
+
+int nandfs_erase(struct nandfs_device *, off_t, size_t);
+
+#define NANDFS_VOP_ISLOCKED(vp) nandfs_vop_islocked((vp))
+int nandfs_vop_islocked(struct vnode *vp);
+
+nandfs_daddr_t nandfs_block_to_dblock(struct nandfs_device *, nandfs_lbn_t);
+
+#define DEBUG_MODE
+#if defined(DEBUG_MODE)
+#define nandfs_error panic
+#define nandfs_warning printf
+#elif defined(TEST_MODE)
+#define nandfs_error printf
+#define nandfs_warning printf
+#else
+#define nandfs_error(...)
+#define nandfs_warning(...)
+#endif
+
+#endif /* !_FS_NANDFS_NANDFS_SUBR_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_sufile.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_sufile.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,569 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_sufile.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+#define SU_USAGE_OFF(bp, offset) \
+ ((struct nandfs_segment_usage *)((bp)->b_data + offset))
+
+static int
+nandfs_seg_usage_blk_offset(struct nandfs_device *fsdev, uint64_t seg,
+ uint64_t *blk, uint64_t *offset)
+{
+ uint64_t off;
+ uint16_t seg_size;
+
+ seg_size = fsdev->nd_fsdata.f_segment_usage_size;
+
+ off = roundup(sizeof(struct nandfs_sufile_header), seg_size);
+ off += (seg * seg_size);
+
+ *blk = off / fsdev->nd_blocksize;
+ *offset = off % fsdev->nd_blocksize;
+ return (0);
+}
+
+/* Alloc new segment */
+int
+nandfs_alloc_segment(struct nandfs_device *fsdev, uint64_t *seg)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_sufile_header *su_header;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp_header, *bp;
+ uint64_t blk, vblk, offset, i, rest, nsegments;
+ uint16_t seg_size;
+ int error, found;
+
+ seg_size = fsdev->nd_fsdata.f_segment_usage_size;
+ nsegments = fsdev->nd_fsdata.f_nsegments;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ /* Read header buffer */
+ error = nandfs_bread(su_node, 0, NOCRED, 0, &bp_header);
+ if (error) {
+ brelse(bp_header);
+ return (error);
+ }
+
+ su_header = (struct nandfs_sufile_header *)bp_header->b_data;
+
+ /* Get last allocated segment */
+ i = su_header->sh_last_alloc + 1;
+
+ found = 0;
+ bp = NULL;
+ while (!found) {
+ nandfs_seg_usage_blk_offset(fsdev, i, &blk, &offset);
+ if(blk != 0) {
+ error = nandfs_bmap_lookup(su_node, blk, &vblk);
+ if (error) {
+ nandfs_error("%s: cannot find vblk for blk "
+ "blk:%jx\n", __func__, blk);
+ return (error);
+ }
+ if (vblk)
+ error = nandfs_bread(su_node, blk, NOCRED, 0,
+ &bp);
+ else
+ error = nandfs_bcreate(su_node, blk, NOCRED, 0,
+ &bp);
+ if (error) {
+ nandfs_error("%s: cannot create/read "
+ "vblk:%jx\n", __func__, vblk);
+ if (bp)
+ brelse(bp);
+ return (error);
+ }
+
+ su_usage = SU_USAGE_OFF(bp, offset);
+ } else {
+ su_usage = SU_USAGE_OFF(bp_header, offset);
+ bp = bp_header;
+ }
+
+ rest = (fsdev->nd_blocksize - offset) / seg_size;
+ /* Go through all su usage in block */
+ while (rest) {
+ /* When last check start from beggining */
+ if (i == nsegments)
+ break;
+
+ if (!su_usage->su_flags) {
+ su_usage->su_flags = 1;
+ found = 1;
+ break;
+ }
+ su_usage++;
+ i++;
+
+ /* If all checked return error */
+ if (i == su_header->sh_last_alloc) {
+ DPRINTF(SEG, ("%s: cannot allocate segment \n",
+ __func__));
+ brelse(bp_header);
+ if (blk != 0)
+ brelse(bp);
+ return (1);
+ }
+ rest--;
+ }
+ if (!found) {
+ /* Otherwise read another block */
+ if (blk != 0)
+ brelse(bp);
+ if (i == nsegments) {
+ blk = 0;
+ i = 0;
+ } else
+ blk++;
+ offset = 0;
+ }
+ }
+
+ if (found) {
+ *seg = i;
+ su_header->sh_last_alloc = i;
+ su_header->sh_ncleansegs--;
+ su_header->sh_ndirtysegs++;
+
+ fsdev->nd_super.s_free_blocks_count = su_header->sh_ncleansegs *
+ fsdev->nd_fsdata.f_blocks_per_segment;
+ fsdev->nd_clean_segs--;
+
+ /*
+ * It is mostly called from syncer() so we want to force
+ * making buf dirty.
+ */
+ error = nandfs_dirty_buf(bp_header, 1);
+ if (error) {
+ if (bp && bp != bp_header)
+ brelse(bp);
+ return (error);
+ }
+ if (bp && bp != bp_header)
+ nandfs_dirty_buf(bp, 1);
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)i));
+
+ return (0);
+ }
+
+ DPRINTF(SEG, ("%s: failed\n", __func__));
+
+ return (1);
+}
+
+/*
+ * Make buffer dirty, it will be updated soon but first it need to be
+ * gathered by syncer.
+ */
+int
+nandfs_touch_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ struct nandfs_node *su_node;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ nandfs_error("%s: cannot preallocate new segment\n", __func__);
+ return (error);
+ } else
+ nandfs_dirty_buf(bp, 1);
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+ return (error);
+}
+
+/* Update block count of segment */
+int
+nandfs_update_segment(struct nandfs_device *fsdev, uint64_t seg, uint32_t nblks)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ nandfs_error("%s: read block:%jx to update\n",
+ __func__, blk);
+ brelse(bp);
+ return (error);
+ }
+
+ su_usage = SU_USAGE_OFF(bp, offset);
+ su_usage->su_lastmod = fsdev->nd_ts.tv_sec;
+ su_usage->su_flags = NANDFS_SEGMENT_USAGE_DIRTY;
+ su_usage->su_nblocks += nblks;
+
+ DPRINTF(SEG, ("%s: seg:%#jx inc:%#x cur:%#x\n", __func__,
+ (uintmax_t)seg, nblks, su_usage->su_nblocks));
+
+ nandfs_dirty_buf(bp, 1);
+
+ return (0);
+}
+
+/* Make segment free */
+int
+nandfs_free_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_sufile_header *su_header;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp_header, *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ /* Read su header */
+ error = nandfs_bread(su_node, 0, NOCRED, 0, &bp_header);
+ if (error) {
+ brelse(bp_header);
+ return (error);
+ }
+
+ su_header = (struct nandfs_sufile_header *)bp_header->b_data;
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ /* Read su usage block if other than su header block */
+ if (blk != 0) {
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ brelse(bp_header);
+ return (error);
+ }
+ } else
+ bp = bp_header;
+
+ /* Reset su usage data */
+ su_usage = SU_USAGE_OFF(bp, offset);
+ su_usage->su_lastmod = fsdev->nd_ts.tv_sec;
+ su_usage->su_nblocks = 0;
+ su_usage->su_flags = 0;
+
+ /* Update clean/dirty counter in header */
+ su_header->sh_ncleansegs++;
+ su_header->sh_ndirtysegs--;
+
+ /*
+ * Make buffers dirty, called by cleaner
+ * so force dirty even if no much space left
+ * on device
+ */
+ nandfs_dirty_buf(bp_header, 1);
+ if (bp != bp_header)
+ nandfs_dirty_buf(bp, 1);
+
+ /* Update free block count */
+ fsdev->nd_super.s_free_blocks_count = su_header->sh_ncleansegs *
+ fsdev->nd_fsdata.f_blocks_per_segment;
+ fsdev->nd_clean_segs++;
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+
+ return (0);
+}
+
+static int
+nandfs_bad_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ su_usage = SU_USAGE_OFF(bp, offset);
+ su_usage->su_lastmod = fsdev->nd_ts.tv_sec;
+ su_usage->su_flags = NANDFS_SEGMENT_USAGE_ERROR;
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+
+ nandfs_dirty_buf(bp, 1);
+
+ return (0);
+}
+
+int
+nandfs_markgc_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+
+ VOP_LOCK(NTOV(su_node), LK_EXCLUSIVE);
+
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+ return (error);
+ }
+
+ su_usage = SU_USAGE_OFF(bp, offset);
+ MPASS((su_usage->su_flags & NANDFS_SEGMENT_USAGE_GC) == 0);
+ su_usage->su_flags |= NANDFS_SEGMENT_USAGE_GC;
+
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+
+ return (0);
+}
+
+int
+nandfs_clear_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ uint64_t offset, segsize;
+ uint32_t bps, bsize;
+ int error = 0;
+
+ bps = fsdev->nd_fsdata.f_blocks_per_segment;
+ bsize = fsdev->nd_blocksize;
+ segsize = bsize * bps;
+ nandfs_get_segment_range(fsdev, seg, &offset, NULL);
+ offset *= bsize;
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+
+ /* Erase it and mark it bad when fail */
+ if (nandfs_erase(fsdev, offset, segsize))
+ error = nandfs_bad_segment(fsdev, seg);
+
+ if (error)
+ return (error);
+
+ /* Mark it free */
+ error = nandfs_free_segment(fsdev, seg);
+
+ return (error);
+}
+
+int
+nandfs_get_seg_stat(struct nandfs_device *nandfsdev,
+ struct nandfs_seg_stat *nss)
+{
+ struct nandfs_sufile_header *suhdr;
+ struct nandfs_node *su_node;
+ struct buf *bp;
+ int err;
+
+ su_node = nandfsdev->nd_su_node;
+
+ NANDFS_WRITELOCK(nandfsdev);
+ VOP_LOCK(NTOV(su_node), LK_SHARED);
+ err = nandfs_bread(nandfsdev->nd_su_node, 0, NOCRED, 0, &bp);
+ if (err) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+ NANDFS_WRITEUNLOCK(nandfsdev);
+ return (-1);
+ }
+
+ suhdr = (struct nandfs_sufile_header *)bp->b_data;
+ nss->nss_nsegs = nandfsdev->nd_fsdata.f_nsegments;
+ nss->nss_ncleansegs = suhdr->sh_ncleansegs;
+ nss->nss_ndirtysegs = suhdr->sh_ndirtysegs;
+ nss->nss_ctime = 0;
+ nss->nss_nongc_ctime = nandfsdev->nd_ts.tv_sec;
+ nss->nss_prot_seq = nandfsdev->nd_seg_sequence;
+
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+
+ NANDFS_WRITEUNLOCK(nandfsdev);
+
+ return (0);
+}
+
+int
+nandfs_get_segment_info_ioctl(struct nandfs_device *fsdev,
+ struct nandfs_argv *nargv)
+{
+ struct nandfs_suinfo *nsi;
+ int error;
+
+ if (nargv->nv_nmembs > NANDFS_SEGMENTS_MAX)
+ return (EINVAL);
+
+ nsi = malloc(sizeof(struct nandfs_suinfo) * nargv->nv_nmembs,
+ M_NANDFSTEMP, M_WAITOK | M_ZERO);
+
+ error = nandfs_get_segment_info(fsdev, nsi, nargv->nv_nmembs,
+ nargv->nv_index);
+
+ if (error == 0)
+ error = copyout(nsi, (void *)(uintptr_t)nargv->nv_base,
+ sizeof(struct nandfs_suinfo) * nargv->nv_nmembs);
+
+ free(nsi, M_NANDFSTEMP);
+ return (error);
+}
+
+int
+nandfs_get_segment_info(struct nandfs_device *fsdev, struct nandfs_suinfo *nsi,
+ uint32_t nmembs, uint64_t segment)
+{
+
+ return (nandfs_get_segment_info_filter(fsdev, nsi, nmembs, segment,
+ NULL, 0, 0));
+}
+
+int
+nandfs_get_segment_info_filter(struct nandfs_device *fsdev,
+ struct nandfs_suinfo *nsi, uint32_t nmembs, uint64_t segment,
+ uint64_t *nsegs, uint32_t filter, uint32_t nfilter)
+{
+ struct nandfs_segment_usage *su;
+ struct nandfs_node *su_node;
+ struct buf *bp;
+ uint64_t curr, blocknr, blockoff, i;
+ uint32_t flags;
+ int err = 0;
+
+ curr = ~(0);
+
+ lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL);
+ su_node = fsdev->nd_su_node;
+
+ VOP_LOCK(NTOV(su_node), LK_SHARED);
+
+ bp = NULL;
+ if (nsegs != NULL)
+ *nsegs = 0;
+ for (i = 0; i < nmembs; segment++) {
+ if (segment == fsdev->nd_fsdata.f_nsegments)
+ break;
+
+ nandfs_seg_usage_blk_offset(fsdev, segment, &blocknr,
+ &blockoff);
+
+ if (i == 0 || curr != blocknr) {
+ if (bp != NULL)
+ brelse(bp);
+ err = nandfs_bread(su_node, blocknr, NOCRED,
+ 0, &bp);
+ if (err) {
+ goto out;
+ }
+ curr = blocknr;
+ }
+
+ su = SU_USAGE_OFF(bp, blockoff);
+ flags = su->su_flags;
+ if (segment == fsdev->nd_seg_num ||
+ segment == fsdev->nd_next_seg_num)
+ flags |= NANDFS_SEGMENT_USAGE_ACTIVE;
+
+ if (nfilter != 0 && (flags & nfilter) != 0)
+ continue;
+ if (filter != 0 && (flags & filter) == 0)
+ continue;
+
+ nsi->nsi_num = segment;
+ nsi->nsi_lastmod = su->su_lastmod;
+ nsi->nsi_blocks = su->su_nblocks;
+ nsi->nsi_flags = flags;
+ nsi++;
+ i++;
+ if (nsegs != NULL)
+ (*nsegs)++;
+ }
+
+out:
+ if (bp != NULL)
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL);
+
+ return (err);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_vfsops.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_vfsops.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,1590 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_vfsops.c,v 1.1 2009/07/18 16:31:42 reinoud Exp
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_vfsops.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <machine/_inttypes.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+static MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount structure");
+
+#define NANDFS_SET_SYSTEMFILE(vp) { \
+ (vp)->v_vflag |= VV_SYSTEM; \
+ vref(vp); \
+ vput(vp); }
+
+#define NANDFS_UNSET_SYSTEMFILE(vp) { \
+ VOP_LOCK(vp, LK_EXCLUSIVE); \
+ MPASS(vp->v_bufobj.bo_dirty.bv_cnt == 0); \
+ (vp)->v_vflag &= ~VV_SYSTEM; \
+ vgone(vp); \
+ vput(vp); }
+
+/* Globals */
+struct _nandfs_devices nandfs_devices;
+
+/* Parameters */
+int nandfs_verbose = 0;
+
+static void
+nandfs_tunable_init(void *arg)
+{
+
+ TUNABLE_INT_FETCH("vfs.nandfs.verbose", &nandfs_verbose);
+}
+SYSINIT(nandfs_tunables, SI_SUB_VFS, SI_ORDER_ANY, nandfs_tunable_init, NULL);
+
+static SYSCTL_NODE(_vfs, OID_AUTO, nandfs, CTLFLAG_RD, 0, "NAND filesystem");
+static SYSCTL_NODE(_vfs_nandfs, OID_AUTO, mount, CTLFLAG_RD, 0,
+ "NANDFS mountpoints");
+SYSCTL_INT(_vfs_nandfs, OID_AUTO, verbose, CTLFLAG_RW, &nandfs_verbose, 0, "");
+
+#define NANDFS_CONSTR_INTERVAL 5
+int nandfs_sync_interval = NANDFS_CONSTR_INTERVAL; /* sync every 5 seconds */
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, sync_interval, CTLFLAG_RW,
+ &nandfs_sync_interval, 0, "");
+
+#define NANDFS_MAX_DIRTY_SEGS 5
+int nandfs_max_dirty_segs = NANDFS_MAX_DIRTY_SEGS; /* sync when 5 dirty seg */
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, max_dirty_segs, CTLFLAG_RW,
+ &nandfs_max_dirty_segs, 0, "");
+
+#define NANDFS_CPS_BETWEEN_SBLOCKS 5
+int nandfs_cps_between_sblocks = NANDFS_CPS_BETWEEN_SBLOCKS; /* write superblock every 5 checkpoints */
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cps_between_sblocks, CTLFLAG_RW,
+ &nandfs_cps_between_sblocks, 0, "");
+
+#define NANDFS_CLEANER_ENABLE 1
+int nandfs_cleaner_enable = NANDFS_CLEANER_ENABLE;
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_enable, CTLFLAG_RW,
+ &nandfs_cleaner_enable, 0, "");
+
+#define NANDFS_CLEANER_INTERVAL 5
+int nandfs_cleaner_interval = NANDFS_CLEANER_INTERVAL;
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_interval, CTLFLAG_RW,
+ &nandfs_cleaner_interval, 0, "");
+
+#define NANDFS_CLEANER_SEGMENTS 5
+int nandfs_cleaner_segments = NANDFS_CLEANER_SEGMENTS;
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_segments, CTLFLAG_RW,
+ &nandfs_cleaner_segments, 0, "");
+
+static int nandfs_mountfs(struct vnode *devvp, struct mount *mp);
+static vfs_mount_t nandfs_mount;
+static vfs_root_t nandfs_root;
+static vfs_statfs_t nandfs_statfs;
+static vfs_unmount_t nandfs_unmount;
+static vfs_vget_t nandfs_vget;
+static vfs_sync_t nandfs_sync;
+static const char *nandfs_opts[] = {
+ "snap", "from", "noatime", NULL
+};
+
+/* System nodes */
+static int
+nandfs_create_system_nodes(struct nandfs_device *nandfsdev)
+{
+ int error;
+
+ error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_DAT_INO,
+ &nandfsdev->nd_super_root.sr_dat, &nandfsdev->nd_dat_node);
+ if (error)
+ goto errorout;
+
+ error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_CPFILE_INO,
+ &nandfsdev->nd_super_root.sr_cpfile, &nandfsdev->nd_cp_node);
+ if (error)
+ goto errorout;
+
+ error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_SUFILE_INO,
+ &nandfsdev->nd_super_root.sr_sufile, &nandfsdev->nd_su_node);
+ if (error)
+ goto errorout;
+
+ error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_GC_INO,
+ NULL, &nandfsdev->nd_gc_node);
+ if (error)
+ goto errorout;
+
+ NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node));
+ NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node));
+ NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node));
+ NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node));
+
+ DPRINTF(VOLUMES, ("System vnodes: dat: %p cp: %p su: %p\n",
+ NTOV(nandfsdev->nd_dat_node), NTOV(nandfsdev->nd_cp_node),
+ NTOV(nandfsdev->nd_su_node)));
+ return (0);
+
+errorout:
+ nandfs_dispose_node(&nandfsdev->nd_gc_node);
+ nandfs_dispose_node(&nandfsdev->nd_dat_node);
+ nandfs_dispose_node(&nandfsdev->nd_cp_node);
+ nandfs_dispose_node(&nandfsdev->nd_su_node);
+
+ return (error);
+}
+
+static void
+nandfs_release_system_nodes(struct nandfs_device *nandfsdev)
+{
+
+ if (!nandfsdev)
+ return;
+ if (nandfsdev->nd_refcnt > 0)
+ return;
+
+ if (nandfsdev->nd_gc_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node));
+ if (nandfsdev->nd_dat_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node));
+ if (nandfsdev->nd_cp_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node));
+ if (nandfsdev->nd_su_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node));
+}
+
+static int
+nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata)
+{
+ uint32_t fsdata_crc, comp_crc;
+
+ if (fsdata->f_magic != NANDFS_FSDATA_MAGIC)
+ return (0);
+
+ /* Preserve CRC */
+ fsdata_crc = fsdata->f_sum;
+
+ /* Calculate */
+ fsdata->f_sum = (0);
+ comp_crc = crc32((uint8_t *)fsdata, fsdata->f_bytes);
+
+ /* Restore */
+ fsdata->f_sum = fsdata_crc;
+
+ /* Check CRC */
+ return (fsdata_crc == comp_crc);
+}
+
+static int
+nandfs_check_superblock_crc(struct nandfs_fsdata *fsdata,
+ struct nandfs_super_block *super)
+{
+ uint32_t super_crc, comp_crc;
+
+ /* Check super block magic */
+ if (super->s_magic != NANDFS_SUPER_MAGIC)
+ return (0);
+
+ /* Preserve CRC */
+ super_crc = super->s_sum;
+
+ /* Calculate */
+ super->s_sum = (0);
+ comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes);
+
+ /* Restore */
+ super->s_sum = super_crc;
+
+ /* Check CRC */
+ return (super_crc == comp_crc);
+}
+
+static void
+nandfs_calc_superblock_crc(struct nandfs_fsdata *fsdata,
+ struct nandfs_super_block *super)
+{
+ uint32_t comp_crc;
+
+ /* Calculate */
+ super->s_sum = 0;
+ comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes);
+
+ /* Restore */
+ super->s_sum = comp_crc;
+}
+
+static int
+nandfs_is_empty(u_char *area, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ if (area[i] != 0xff)
+ return (0);
+
+ return (1);
+}
+
+static __inline int
+nandfs_sblocks_in_esize(struct nandfs_device *fsdev)
+{
+
+ return ((fsdev->nd_erasesize - NANDFS_SBLOCK_OFFSET_BYTES) /
+ sizeof(struct nandfs_super_block));
+}
+
+static __inline int
+nandfs_max_sblocks(struct nandfs_device *fsdev)
+{
+
+ return (NANDFS_NFSAREAS * nandfs_sblocks_in_esize(fsdev));
+}
+
+static __inline int
+nandfs_sblocks_in_block(struct nandfs_device *fsdev)
+{
+
+ return (fsdev->nd_devblocksize / sizeof(struct nandfs_super_block));
+}
+
+static __inline int
+nandfs_sblocks_in_first_block(struct nandfs_device *fsdev)
+{
+ int n;
+
+ n = nandfs_sblocks_in_block(fsdev) -
+ NANDFS_SBLOCK_OFFSET_BYTES / sizeof(struct nandfs_super_block);
+ if (n < 0)
+ n = 0;
+
+ return (n);
+}
+
+static int
+nandfs_write_superblock_at(struct nandfs_device *fsdev,
+ struct nandfs_fsarea *fstp)
+{
+ struct nandfs_super_block *super, *supert;
+ struct buf *bp;
+ int sb_per_sector, sbs_in_fsd, read_block;
+ int index, pos, error;
+ off_t offset;
+
+ DPRINTF(SYNC, ("%s: last_used %d nandfs_sblocks_in_esize %d\n",
+ __func__, fstp->last_used, nandfs_sblocks_in_esize(fsdev)));
+ if (fstp->last_used == nandfs_sblocks_in_esize(fsdev) - 1)
+ index = 0;
+ else
+ index = fstp->last_used + 1;
+
+ super = &fsdev->nd_super;
+ supert = NULL;
+
+ sb_per_sector = nandfs_sblocks_in_block(fsdev);
+ sbs_in_fsd = sizeof(struct nandfs_fsdata) /
+ sizeof(struct nandfs_super_block);
+ index += sbs_in_fsd;
+ offset = fstp->offset;
+
+ DPRINTF(SYNC, ("%s: offset %#jx s_last_pseg %#jx s_last_cno %#jx "
+ "s_last_seq %#jx wtime %jd index %d\n", __func__, offset,
+ super->s_last_pseg, super->s_last_cno, super->s_last_seq,
+ super->s_wtime, index));
+
+ read_block = btodb(offset + ((index / sb_per_sector) * sb_per_sector)
+ * sizeof(struct nandfs_super_block));
+
+ DPRINTF(SYNC, ("%s: read_block %#x\n", __func__, read_block));
+
+ if (index == sbs_in_fsd) {
+ error = nandfs_erase(fsdev, offset, fsdev->nd_erasesize);
+ if (error)
+ return (error);
+
+ error = bread(fsdev->nd_devvp, btodb(offset),
+ fsdev->nd_devblocksize, NOCRED, &bp);
+ if (error) {
+ printf("NANDFS: couldn't read initial data: %d\n",
+ error);
+ brelse(bp);
+ return (error);
+ }
+ memcpy(bp->b_data, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
+ /*
+ * 0xff-out the rest. This bp could be cached, so potentially
+ * b_data contains stale super blocks.
+ *
+ * We don't mind cached bp since most of the time we just add
+ * super blocks to already 0xff-out b_data and don't need to
+ * perform actual read.
+ */
+ if (fsdev->nd_devblocksize > sizeof(fsdev->nd_fsdata))
+ memset(bp->b_data + sizeof(fsdev->nd_fsdata), 0xff,
+ fsdev->nd_devblocksize - sizeof(fsdev->nd_fsdata));
+ error = bwrite(bp);
+ if (error) {
+ printf("NANDFS: cannot rewrite initial data at %jx\n",
+ offset);
+ return (error);
+ }
+ }
+
+ error = bread(fsdev->nd_devvp, read_block, fsdev->nd_devblocksize,
+ NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ supert = (struct nandfs_super_block *)(bp->b_data);
+ pos = index % sb_per_sector;
+
+ DPRINTF(SYNC, ("%s: storing at %d\n", __func__, pos));
+ memcpy(&supert[pos], super, sizeof(struct nandfs_super_block));
+
+ /*
+ * See comment above in code that performs erase.
+ */
+ if (pos == 0)
+ memset(&supert[1], 0xff,
+ (sb_per_sector - 1) * sizeof(struct nandfs_super_block));
+
+ error = bwrite(bp);
+ if (error) {
+ printf("NANDFS: cannot update superblock at %jx\n", offset);
+ return (error);
+ }
+
+ DPRINTF(SYNC, ("%s: fstp->last_used %d -> %d\n", __func__,
+ fstp->last_used, index - sbs_in_fsd));
+ fstp->last_used = index - sbs_in_fsd;
+
+ return (0);
+}
+
+int
+nandfs_write_superblock(struct nandfs_device *fsdev)
+{
+ struct nandfs_super_block *super;
+ struct timespec ts;
+ int error;
+ int i, j;
+
+ vfs_timestamp(&ts);
+
+ super = &fsdev->nd_super;
+
+ super->s_last_pseg = fsdev->nd_last_pseg;
+ super->s_last_cno = fsdev->nd_last_cno;
+ super->s_last_seq = fsdev->nd_seg_sequence;
+ super->s_wtime = ts.tv_sec;
+
+ nandfs_calc_superblock_crc(&fsdev->nd_fsdata, super);
+
+ error = 0;
+ for (i = 0, j = fsdev->nd_last_fsarea; i < NANDFS_NFSAREAS;
+ i++, j = (j + 1 % NANDFS_NFSAREAS)) {
+ if (fsdev->nd_fsarea[j].flags & NANDFS_FSSTOR_FAILED) {
+ DPRINTF(SYNC, ("%s: skipping %d\n", __func__, j));
+ continue;
+ }
+ error = nandfs_write_superblock_at(fsdev, &fsdev->nd_fsarea[j]);
+ if (error) {
+ printf("NANDFS: writing superblock at offset %d failed:"
+ "%d\n", j * fsdev->nd_erasesize, error);
+ fsdev->nd_fsarea[j].flags |= NANDFS_FSSTOR_FAILED;
+ } else
+ break;
+ }
+
+ if (i == NANDFS_NFSAREAS) {
+ printf("NANDFS: superblock was not written\n");
+ /*
+ * TODO: switch to read-only?
+ */
+ return (error);
+ } else
+ fsdev->nd_last_fsarea = (j + 1) % NANDFS_NFSAREAS;
+
+ return (0);
+}
+
+static int
+nandfs_select_fsdata(struct nandfs_device *fsdev,
+ struct nandfs_fsdata *fsdatat, struct nandfs_fsdata **fsdata, int nfsds)
+{
+ int i;
+
+ *fsdata = NULL;
+ for (i = 0; i < nfsds; i++) {
+ DPRINTF(VOLUMES, ("%s: i %d f_magic %x f_crc %x\n", __func__,
+ i, fsdatat[i].f_magic, fsdatat[i].f_sum));
+ if (!nandfs_check_fsdata_crc(&fsdatat[i]))
+ continue;
+ *fsdata = &fsdatat[i];
+ break;
+ }
+
+ return (*fsdata != NULL ? 0 : EINVAL);
+}
+
+static int
+nandfs_select_sb(struct nandfs_device *fsdev,
+ struct nandfs_super_block *supert, struct nandfs_super_block **super,
+ int nsbs)
+{
+ int i;
+
+ *super = NULL;
+ for (i = 0; i < nsbs; i++) {
+ if (!nandfs_check_superblock_crc(&fsdev->nd_fsdata, &supert[i]))
+ continue;
+ DPRINTF(SYNC, ("%s: i %d s_last_cno %jx s_magic %x "
+ "s_wtime %jd\n", __func__, i, supert[i].s_last_cno,
+ supert[i].s_magic, supert[i].s_wtime));
+ if (*super == NULL || supert[i].s_last_cno >
+ (*super)->s_last_cno)
+ *super = &supert[i];
+ }
+
+ return (*super != NULL ? 0 : EINVAL);
+}
+
+static int
+nandfs_read_structures_at(struct nandfs_device *fsdev,
+ struct nandfs_fsarea *fstp, struct nandfs_fsdata *fsdata,
+ struct nandfs_super_block *super)
+{
+ struct nandfs_super_block *tsuper, *tsuperd;
+ struct buf *bp;
+ int error, read_size;
+ int i;
+ int offset;
+
+ offset = fstp->offset;
+
+ if (fsdev->nd_erasesize > MAXBSIZE)
+ read_size = MAXBSIZE;
+ else
+ read_size = fsdev->nd_erasesize;
+
+ error = bread(fsdev->nd_devvp, btodb(offset), read_size, NOCRED, &bp);
+ if (error) {
+ printf("couldn't read: %d\n", error);
+ brelse(bp);
+ fstp->flags |= NANDFS_FSSTOR_FAILED;
+ return (error);
+ }
+
+ tsuper = super;
+
+ memcpy(fsdata, bp->b_data, sizeof(struct nandfs_fsdata));
+ memcpy(tsuper, (bp->b_data + sizeof(struct nandfs_fsdata)),
+ read_size - sizeof(struct nandfs_fsdata));
+ brelse(bp);
+
+ tsuper += (read_size - sizeof(struct nandfs_fsdata)) /
+ sizeof(struct nandfs_super_block);
+
+ for (i = 1; i < fsdev->nd_erasesize / read_size; i++) {
+ error = bread(fsdev->nd_devvp, btodb(offset + i * read_size),
+ read_size, NOCRED, &bp);
+ if (error) {
+ printf("couldn't read: %d\n", error);
+ brelse(bp);
+ fstp->flags |= NANDFS_FSSTOR_FAILED;
+ return (error);
+ }
+ memcpy(tsuper, bp->b_data, read_size);
+ tsuper += read_size / sizeof(struct nandfs_super_block);
+ brelse(bp);
+ }
+
+ tsuper -= 1;
+ fstp->last_used = nandfs_sblocks_in_esize(fsdev) - 1;
+ for (tsuperd = super - 1; (tsuper != tsuperd); tsuper -= 1) {
+ if (nandfs_is_empty((u_char *)tsuper, sizeof(*tsuper)))
+ fstp->last_used--;
+ else
+ break;
+ }
+
+ DPRINTF(VOLUMES, ("%s: last_used %d\n", __func__, fstp->last_used));
+
+ return (0);
+}
+
+static int
+nandfs_read_structures(struct nandfs_device *fsdev)
+{
+ struct nandfs_fsdata *fsdata, *fsdatat;
+ struct nandfs_super_block *sblocks, *ssblock;
+ int nsbs, nfsds, i;
+ int error = 0;
+ int nrsbs;
+
+ nfsds = NANDFS_NFSAREAS;
+ nsbs = nandfs_max_sblocks(fsdev);
+
+ fsdatat = malloc(sizeof(struct nandfs_fsdata) * nfsds, M_NANDFSTEMP,
+ M_WAITOK | M_ZERO);
+ sblocks = malloc(sizeof(struct nandfs_super_block) * nsbs, M_NANDFSTEMP,
+ M_WAITOK | M_ZERO);
+
+ nrsbs = 0;
+ for (i = 0; i < NANDFS_NFSAREAS; i++) {
+ fsdev->nd_fsarea[i].offset = i * fsdev->nd_erasesize;
+ error = nandfs_read_structures_at(fsdev, &fsdev->nd_fsarea[i],
+ &fsdatat[i], sblocks + nrsbs);
+ if (error)
+ continue;
+ nrsbs += (fsdev->nd_fsarea[i].last_used + 1);
+ if (fsdev->nd_fsarea[fsdev->nd_last_fsarea].last_used >
+ fsdev->nd_fsarea[i].last_used)
+ fsdev->nd_last_fsarea = i;
+ }
+
+ if (nrsbs == 0) {
+ printf("nandfs: no valid superblocks found\n");
+ error = EINVAL;
+ goto out;
+ }
+
+ error = nandfs_select_fsdata(fsdev, fsdatat, &fsdata, nfsds);
+ if (error)
+ goto out;
+ memcpy(&fsdev->nd_fsdata, fsdata, sizeof(struct nandfs_fsdata));
+
+ error = nandfs_select_sb(fsdev, sblocks, &ssblock, nsbs);
+ if (error)
+ goto out;
+
+ memcpy(&fsdev->nd_super, ssblock, sizeof(struct nandfs_super_block));
+out:
+ free(fsdatat, M_NANDFSTEMP);
+ free(sblocks, M_NANDFSTEMP);
+
+ if (error == 0)
+ DPRINTF(VOLUMES, ("%s: selected sb with w_time %jd "
+ "last_pseg %#jx\n", __func__, fsdev->nd_super.s_wtime,
+ fsdev->nd_super.s_last_pseg));
+
+ return (error);
+}
+
+static void
+nandfs_unmount_base(struct nandfs_device *nandfsdev)
+{
+ int error;
+
+ if (!nandfsdev)
+ return;
+
+ /* Remove all our information */
+ error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0);
+ if (error) {
+ /*
+ * Flushing buffers failed when fs was umounting, can't do
+ * much now, just printf error and continue with umount.
+ */
+ nandfs_error("%s(): error:%d when umounting FS\n",
+ __func__, error);
+ }
+
+ /* Release the device's system nodes */
+ nandfs_release_system_nodes(nandfsdev);
+}
+
+static void
+nandfs_get_ncleanseg(struct nandfs_device *nandfsdev)
+{
+ struct nandfs_seg_stat nss;
+
+ nandfs_get_seg_stat(nandfsdev, &nss);
+ nandfsdev->nd_clean_segs = nss.nss_ncleansegs;
+ DPRINTF(VOLUMES, ("nandfs_mount: clean segs: %jx\n",
+ (uintmax_t)nandfsdev->nd_clean_segs));
+}
+
+
+static int
+nandfs_mount_base(struct nandfs_device *nandfsdev, struct mount *mp,
+ struct nandfs_args *args)
+{
+ uint32_t log_blocksize;
+ int error;
+
+ /* Flush out any old buffers remaining from a previous use. */
+ if ((error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0)))
+ return (error);
+
+ error = nandfs_read_structures(nandfsdev);
+ if (error) {
+ printf("nandfs: could not get valid filesystem structures\n");
+ return (error);
+ }
+
+ if (nandfsdev->nd_fsdata.f_rev_level != NANDFS_CURRENT_REV) {
+ printf("nandfs: unsupported file system revision: %d "
+ "(supported is %d).\n", nandfsdev->nd_fsdata.f_rev_level,
+ NANDFS_CURRENT_REV);
+ return (EINVAL);
+ }
+
+ if (nandfsdev->nd_fsdata.f_erasesize != nandfsdev->nd_erasesize) {
+ printf("nandfs: erasesize mismatch (device %#x, fs %#x)\n",
+ nandfsdev->nd_erasesize, nandfsdev->nd_fsdata.f_erasesize);
+ return (EINVAL);
+ }
+
+ /* Get our blocksize */
+ log_blocksize = nandfsdev->nd_fsdata.f_log_block_size;
+ nandfsdev->nd_blocksize = (uint64_t) 1 << (log_blocksize + 10);
+ DPRINTF(VOLUMES, ("%s: blocksize:%x\n", __func__,
+ nandfsdev->nd_blocksize));
+
+ DPRINTF(VOLUMES, ("%s: accepted super block with cp %#jx\n", __func__,
+ (uintmax_t)nandfsdev->nd_super.s_last_cno));
+
+ /* Calculate dat structure parameters */
+ nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_dat_mdt,
+ nandfsdev->nd_fsdata.f_dat_entry_size);
+ nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_ifile_mdt,
+ nandfsdev->nd_fsdata.f_inode_size);
+
+ /* Search for the super root and roll forward when needed */
+ if (nandfs_search_super_root(nandfsdev)) {
+ printf("Cannot find valid SuperRoot\n");
+ return (EINVAL);
+ }
+
+ nandfsdev->nd_mount_state = nandfsdev->nd_super.s_state;
+ if (nandfsdev->nd_mount_state != NANDFS_VALID_FS) {
+ printf("FS is seriously damaged, needs repairing\n");
+ printf("aborting mount\n");
+ return (EINVAL);
+ }
+
+ /*
+ * FS should be ok now. The superblock and the last segsum could be
+ * updated from the repair so extract running values again.
+ */
+ nandfsdev->nd_last_pseg = nandfsdev->nd_super.s_last_pseg;
+ nandfsdev->nd_seg_sequence = nandfsdev->nd_super.s_last_seq;
+ nandfsdev->nd_seg_num = nandfs_get_segnum_of_block(nandfsdev,
+ nandfsdev->nd_last_pseg);
+ nandfsdev->nd_next_seg_num = nandfs_get_segnum_of_block(nandfsdev,
+ nandfsdev->nd_last_segsum.ss_next);
+ nandfsdev->nd_ts.tv_sec = nandfsdev->nd_last_segsum.ss_create;
+ nandfsdev->nd_last_cno = nandfsdev->nd_super.s_last_cno;
+ nandfsdev->nd_fakevblk = 1;
+ nandfsdev->nd_last_ino = NANDFS_USER_INO;
+ DPRINTF(VOLUMES, ("%s: last_pseg %#jx last_cno %#jx last_seq %#jx\n"
+ "fsdev: last_seg: seq %#jx num %#jx, next_seg_num %#jx\n",
+ __func__, (uintmax_t)nandfsdev->nd_last_pseg,
+ (uintmax_t)nandfsdev->nd_last_cno,
+ (uintmax_t)nandfsdev->nd_seg_sequence,
+ (uintmax_t)nandfsdev->nd_seg_sequence,
+ (uintmax_t)nandfsdev->nd_seg_num,
+ (uintmax_t)nandfsdev->nd_next_seg_num));
+
+ DPRINTF(VOLUMES, ("nandfs_mount: accepted super root\n"));
+
+ /* Create system vnodes for DAT, CP and SEGSUM */
+ error = nandfs_create_system_nodes(nandfsdev);
+ if (error)
+ nandfs_unmount_base(nandfsdev);
+
+ nandfs_get_ncleanseg(nandfsdev);
+
+ return (error);
+}
+
+static void
+nandfs_unmount_device(struct nandfs_device *nandfsdev)
+{
+
+ /* Is there anything? */
+ if (nandfsdev == NULL)
+ return;
+
+ /* Remove the device only if we're the last reference */
+ nandfsdev->nd_refcnt--;
+ if (nandfsdev->nd_refcnt >= 1)
+ return;
+
+ MPASS(nandfsdev->nd_syncer == NULL);
+ MPASS(nandfsdev->nd_cleaner == NULL);
+ MPASS(nandfsdev->nd_free_base == NULL);
+
+ /* Unmount our base */
+ nandfs_unmount_base(nandfsdev);
+
+ /* Remove from our device list */
+ SLIST_REMOVE(&nandfs_devices, nandfsdev, nandfs_device, nd_next_device);
+
+ DROP_GIANT();
+ g_topology_lock();
+ g_vfs_close(nandfsdev->nd_gconsumer);
+ g_topology_unlock();
+ PICKUP_GIANT();
+
+ DPRINTF(VOLUMES, ("closing device\n"));
+
+ /* Clear our mount reference and release device node */
+ vrele(nandfsdev->nd_devvp);
+
+ dev_rel(nandfsdev->nd_devvp->v_rdev);
+
+ /* Free our device info */
+ cv_destroy(&nandfsdev->nd_sync_cv);
+ mtx_destroy(&nandfsdev->nd_sync_mtx);
+ cv_destroy(&nandfsdev->nd_clean_cv);
+ mtx_destroy(&nandfsdev->nd_clean_mtx);
+ mtx_destroy(&nandfsdev->nd_mutex);
+ lockdestroy(&nandfsdev->nd_seg_const);
+ free(nandfsdev, M_NANDFSMNT);
+}
+
+static int
+nandfs_check_mounts(struct nandfs_device *nandfsdev, struct mount *mp,
+ struct nandfs_args *args)
+{
+ struct nandfsmount *nmp;
+ uint64_t last_cno;
+
+ /* no double-mounting of the same checkpoint */
+ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
+ if (nmp->nm_mount_args.cpno == args->cpno)
+ return (EBUSY);
+ }
+
+ /* Allow readonly mounts without questioning here */
+ if (mp->mnt_flag & MNT_RDONLY)
+ return (0);
+
+ /* Read/write mount */
+ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
+ /* Only one RW mount on this device! */
+ if ((nmp->nm_vfs_mountp->mnt_flag & MNT_RDONLY)==0)
+ return (EROFS);
+ /* RDONLY on last mountpoint is device busy */
+ last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno;
+ if (nmp->nm_mount_args.cpno == last_cno)
+ return (EBUSY);
+ }
+
+ /* OK for now */
+ return (0);
+}
+
+static int
+nandfs_mount_device(struct vnode *devvp, struct mount *mp,
+ struct nandfs_args *args, struct nandfs_device **nandfsdev_p)
+{
+ struct nandfs_device *nandfsdev;
+ struct g_provider *pp;
+ struct g_consumer *cp;
+ struct cdev *dev;
+ uint32_t erasesize;
+ int error, size;
+ int ronly;
+
+ DPRINTF(VOLUMES, ("Mounting NANDFS device\n"));
+
+ ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+
+ /* Look up device in our nandfs_mountpoints */
+ *nandfsdev_p = NULL;
+ SLIST_FOREACH(nandfsdev, &nandfs_devices, nd_next_device)
+ if (nandfsdev->nd_devvp == devvp)
+ break;
+
+ if (nandfsdev) {
+ DPRINTF(VOLUMES, ("device already mounted\n"));
+ error = nandfs_check_mounts(nandfsdev, mp, args);
+ if (error)
+ return error;
+ nandfsdev->nd_refcnt++;
+ *nandfsdev_p = nandfsdev;
+
+ if (!ronly) {
+ DROP_GIANT();
+ g_topology_lock();
+ error = g_access(nandfsdev->nd_gconsumer, 0, 1, 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ }
+ return (error);
+ }
+
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+ dev = devvp->v_rdev;
+ dev_ref(dev);
+ DROP_GIANT();
+ g_topology_lock();
+ error = g_vfs_open(devvp, &cp, "nandfs", ronly ? 0 : 1);
+ pp = g_dev_getprovider(dev);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ VOP_UNLOCK(devvp, 0);
+ if (error) {
+ dev_rel(dev);
+ return (error);
+ }
+
+ nandfsdev = malloc(sizeof(struct nandfs_device), M_NANDFSMNT, M_WAITOK | M_ZERO);
+
+ /* Initialise */
+ nandfsdev->nd_refcnt = 1;
+ nandfsdev->nd_devvp = devvp;
+ nandfsdev->nd_syncing = 0;
+ nandfsdev->nd_cleaning = 0;
+ nandfsdev->nd_gconsumer = cp;
+ cv_init(&nandfsdev->nd_sync_cv, "nandfssync");
+ mtx_init(&nandfsdev->nd_sync_mtx, "nffssyncmtx", NULL, MTX_DEF);
+ cv_init(&nandfsdev->nd_clean_cv, "nandfsclean");
+ mtx_init(&nandfsdev->nd_clean_mtx, "nffscleanmtx", NULL, MTX_DEF);
+ mtx_init(&nandfsdev->nd_mutex, "nandfsdev lock", NULL, MTX_DEF);
+ lockinit(&nandfsdev->nd_seg_const, PVFS, "nffssegcon", VLKTIMEOUT,
+ LK_CANRECURSE);
+ STAILQ_INIT(&nandfsdev->nd_mounts);
+
+ nandfsdev->nd_devsize = pp->mediasize;
+ nandfsdev->nd_devblocksize = pp->sectorsize;
+
+ size = sizeof(erasesize);
+ error = g_io_getattr("NAND::blocksize", nandfsdev->nd_gconsumer, &size,
+ &erasesize);
+ if (error) {
+ DPRINTF(VOLUMES, ("couldn't get erasesize: %d\n", error));
+
+ if (error == ENOIOCTL || error == EOPNOTSUPP) {
+ /*
+ * We conclude that this is not NAND storage
+ */
+ nandfsdev->nd_erasesize = NANDFS_DEF_ERASESIZE;
+ nandfsdev->nd_is_nand = 0;
+ } else {
+ DROP_GIANT();
+ g_topology_lock();
+ g_vfs_close(nandfsdev->nd_gconsumer);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ dev_rel(dev);
+ free(nandfsdev, M_NANDFSMNT);
+ return (error);
+ }
+ } else {
+ nandfsdev->nd_erasesize = erasesize;
+ nandfsdev->nd_is_nand = 1;
+ }
+
+ DPRINTF(VOLUMES, ("%s: erasesize %x\n", __func__,
+ nandfsdev->nd_erasesize));
+
+ /* Register nandfs_device in list */
+ SLIST_INSERT_HEAD(&nandfs_devices, nandfsdev, nd_next_device);
+
+ error = nandfs_mount_base(nandfsdev, mp, args);
+ if (error) {
+ /* Remove all our information */
+ nandfs_unmount_device(nandfsdev);
+ return (EINVAL);
+ }
+
+ nandfsdev->nd_maxfilesize = nandfs_get_maxfilesize(nandfsdev);
+
+ *nandfsdev_p = nandfsdev;
+ DPRINTF(VOLUMES, ("NANDFS device mounted ok\n"));
+
+ return (0);
+}
+
+static int
+nandfs_mount_checkpoint(struct nandfsmount *nmp)
+{
+ struct nandfs_cpfile_header *cphdr;
+ struct nandfs_checkpoint *cp;
+ struct nandfs_inode ifile_inode;
+ struct nandfs_node *cp_node;
+ struct buf *bp;
+ uint64_t ncp, nsn, cpno, fcpno, blocknr, last_cno;
+ uint32_t off, dlen;
+ int cp_per_block, error;
+
+ cpno = nmp->nm_mount_args.cpno;
+ if (cpno == 0)
+ cpno = nmp->nm_nandfsdev->nd_super.s_last_cno;
+
+ DPRINTF(VOLUMES, ("%s: trying to mount checkpoint number %"PRIu64"\n",
+ __func__, cpno));
+
+ cp_node = nmp->nm_nandfsdev->nd_cp_node;
+
+ VOP_LOCK(NTOV(cp_node), LK_SHARED);
+ /* Get cpfile header from 1st block of cp file */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (error);
+ }
+
+ cphdr = (struct nandfs_cpfile_header *) bp->b_data;
+ ncp = cphdr->ch_ncheckpoints;
+ nsn = cphdr->ch_nsnapshots;
+
+ brelse(bp);
+
+ DPRINTF(VOLUMES, ("mount_nandfs: checkpoint header read in\n"));
+ DPRINTF(VOLUMES, ("\tNumber of checkpoints %"PRIu64"\n", ncp));
+ DPRINTF(VOLUMES, ("\tNumber of snapshots %"PRIu64"\n", nsn));
+
+ /* Read in our specified checkpoint */
+ dlen = nmp->nm_nandfsdev->nd_fsdata.f_checkpoint_size;
+ cp_per_block = nmp->nm_nandfsdev->nd_blocksize / dlen;
+
+ fcpno = cpno + NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET - 1;
+ blocknr = fcpno / cp_per_block;
+ off = (fcpno % cp_per_block) * dlen;
+ error = nandfs_bread(cp_node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ printf("mount_nandfs: couldn't read cp block %"PRIu64"\n",
+ fcpno);
+ return (EINVAL);
+ }
+
+ /* Needs to be a valid checkpoint */
+ cp = (struct nandfs_checkpoint *) ((uint8_t *) bp->b_data + off);
+ if (cp->cp_flags & NANDFS_CHECKPOINT_INVALID) {
+ printf("mount_nandfs: checkpoint marked invalid\n");
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (EINVAL);
+ }
+
+ /* Is this really the checkpoint we want? */
+ if (cp->cp_cno != cpno) {
+ printf("mount_nandfs: checkpoint file corrupt? "
+ "expected cpno %"PRIu64", found cpno %"PRIu64"\n",
+ cpno, cp->cp_cno);
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (EINVAL);
+ }
+
+ /* Check if it's a snapshot ! */
+ last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno;
+ if (cpno != last_cno) {
+ /* Only allow snapshots if not mounting on the last cp */
+ if ((cp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) == 0) {
+ printf( "mount_nandfs: checkpoint %"PRIu64" is not a "
+ "snapshot\n", cpno);
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (EINVAL);
+ }
+ }
+
+ ifile_inode = cp->cp_ifile_inode;
+ brelse(bp);
+
+ /* Get ifile inode */
+ error = nandfs_get_node_raw(nmp->nm_nandfsdev, NULL, NANDFS_IFILE_INO,
+ &ifile_inode, &nmp->nm_ifile_node);
+ if (error) {
+ printf("mount_nandfs: can't read ifile node\n");
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (EINVAL);
+ }
+
+ NANDFS_SET_SYSTEMFILE(NTOV(nmp->nm_ifile_node));
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ /* Get root node? */
+
+ return (0);
+}
+
+static void
+free_nandfs_mountinfo(struct mount *mp)
+{
+ struct nandfsmount *nmp = VFSTONANDFS(mp);
+
+ if (nmp == NULL)
+ return;
+
+ free(nmp, M_NANDFSMNT);
+}
+
+void
+nandfs_wakeup_wait_sync(struct nandfs_device *nffsdev, int reason)
+{
+ char *reasons[] = {
+ "umount",
+ "vfssync",
+ "bdflush",
+ "fforce",
+ "fsync",
+ "ro_upd"
+ };
+
+ DPRINTF(SYNC, ("%s: %s\n", __func__, reasons[reason]));
+ mtx_lock(&nffsdev->nd_sync_mtx);
+ if (nffsdev->nd_syncing)
+ cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx);
+ if (reason == SYNCER_UMOUNT)
+ nffsdev->nd_syncer_exit = 1;
+ nffsdev->nd_syncing = 1;
+ wakeup(&nffsdev->nd_syncing);
+ cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx);
+
+ mtx_unlock(&nffsdev->nd_sync_mtx);
+}
+
+static void
+nandfs_gc_finished(struct nandfs_device *nffsdev, int exit)
+{
+ int error;
+
+ mtx_lock(&nffsdev->nd_sync_mtx);
+ nffsdev->nd_syncing = 0;
+ DPRINTF(SYNC, ("%s: cleaner finish\n", __func__));
+ cv_broadcast(&nffsdev->nd_sync_cv);
+ mtx_unlock(&nffsdev->nd_sync_mtx);
+ if (!exit) {
+ error = tsleep(&nffsdev->nd_syncing, PRIBIO, "-",
+ hz * nandfs_sync_interval);
+ DPRINTF(SYNC, ("%s: cleaner waked up: %d\n",
+ __func__, error));
+ }
+}
+
+static void
+nandfs_syncer(struct nandfsmount *nmp)
+{
+ struct nandfs_device *nffsdev;
+ struct mount *mp;
+ int flags, error;
+
+ mp = nmp->nm_vfs_mountp;
+ nffsdev = nmp->nm_nandfsdev;
+ tsleep(&nffsdev->nd_syncing, PRIBIO, "-", hz * nandfs_sync_interval);
+
+ while (!nffsdev->nd_syncer_exit) {
+ DPRINTF(SYNC, ("%s: syncer run\n", __func__));
+ nffsdev->nd_syncing = 1;
+
+ flags = (nmp->nm_flags & (NANDFS_FORCE_SYNCER | NANDFS_UMOUNT));
+
+ error = nandfs_segment_constructor(nmp, flags);
+ if (error)
+ nandfs_error("%s: error:%d when creating segments\n",
+ __func__, error);
+
+ nmp->nm_flags &= ~flags;
+
+ nandfs_gc_finished(nffsdev, 0);
+ }
+
+ MPASS(nffsdev->nd_cleaner == NULL);
+ error = nandfs_segment_constructor(nmp,
+ NANDFS_FORCE_SYNCER | NANDFS_UMOUNT);
+ if (error)
+ nandfs_error("%s: error:%d when creating segments\n",
+ __func__, error);
+ nandfs_gc_finished(nffsdev, 1);
+ nffsdev->nd_syncer = NULL;
+ MPASS(nffsdev->nd_free_base == NULL);
+
+ DPRINTF(SYNC, ("%s: exiting\n", __func__));
+ kthread_exit();
+}
+
+static int
+start_syncer(struct nandfsmount *nmp)
+{
+ int error;
+
+ MPASS(nmp->nm_nandfsdev->nd_syncer == NULL);
+
+ DPRINTF(SYNC, ("%s: start syncer\n", __func__));
+
+ nmp->nm_nandfsdev->nd_syncer_exit = 0;
+
+ error = kthread_add((void(*)(void *))nandfs_syncer, nmp, NULL,
+ &nmp->nm_nandfsdev->nd_syncer, 0, 0, "nandfs_syncer");
+
+ if (error)
+ printf("nandfs: could not start syncer: %d\n", error);
+
+ return (error);
+}
+
+static int
+stop_syncer(struct nandfsmount *nmp)
+{
+
+ MPASS(nmp->nm_nandfsdev->nd_syncer != NULL);
+
+ nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_UMOUNT);
+
+ DPRINTF(SYNC, ("%s: stop syncer\n", __func__));
+ return (0);
+}
+
+/*
+ * Mount null layer
+ */
+static int
+nandfs_mount(struct mount *mp)
+{
+ struct nandfsmount *nmp;
+ struct vnode *devvp;
+ struct nameidata nd;
+ struct vfsoptlist *opts;
+ struct thread *td;
+ char *from;
+ int error = 0, flags;
+
+ DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp));
+
+ td = curthread;
+ opts = mp->mnt_optnew;
+
+ if (vfs_filteropt(opts, nandfs_opts))
+ return (EINVAL);
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ nmp = VFSTONANDFS(mp);
+ if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
+ return (error);
+ }
+ if (!(nmp->nm_ronly) && vfs_flagopt(opts, "ro", NULL, 0)) {
+ vn_start_write(NULL, &mp, V_WAIT);
+ error = VFS_SYNC(mp, MNT_WAIT);
+ if (error)
+ return (error);
+ vn_finished_write(mp);
+
+ flags = WRITECLOSE;
+ if (mp->mnt_flag & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ nandfs_wakeup_wait_sync(nmp->nm_nandfsdev,
+ SYNCER_ROUPD);
+ error = vflush(mp, 0, flags, td);
+ if (error)
+ return (error);
+
+ nandfs_stop_cleaner(nmp->nm_nandfsdev);
+ stop_syncer(nmp);
+ DROP_GIANT();
+ g_topology_lock();
+ g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1, 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= MNT_RDONLY;
+ MNT_IUNLOCK(mp);
+ nmp->nm_ronly = 1;
+
+ } else if ((nmp->nm_ronly) &&
+ !vfs_flagopt(opts, "ro", NULL, 0)) {
+ /*
+ * Don't allow read-write snapshots.
+ */
+ if (nmp->nm_mount_args.cpno != 0)
+ return (EROFS);
+ /*
+ * If upgrade to read-write by non-root, then verify
+ * that user has necessary permissions on the device.
+ */
+ devvp = nmp->nm_nandfsdev->nd_devvp;
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+ error = VOP_ACCESS(devvp, VREAD | VWRITE,
+ td->td_ucred, td);
+ if (error) {
+ error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+ if (error) {
+ VOP_UNLOCK(devvp, 0);
+ return (error);
+ }
+ }
+
+ VOP_UNLOCK(devvp, 0);
+ DROP_GIANT();
+ g_topology_lock();
+ error = g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, 1,
+ 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ if (error)
+ return (error);
+
+ MNT_ILOCK(mp);
+ mp->mnt_flag &= ~MNT_RDONLY;
+ MNT_IUNLOCK(mp);
+ error = start_syncer(nmp);
+ if (error == 0)
+ error = nandfs_start_cleaner(nmp->nm_nandfsdev);
+ if (error) {
+ DROP_GIANT();
+ g_topology_lock();
+ g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1,
+ 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ return (error);
+ }
+
+ nmp->nm_ronly = 0;
+ }
+ return (0);
+ }
+
+ from = vfs_getopts(opts, "from", &error);
+ if (error)
+ return (error);
+
+ /*
+ * Find device node
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, from, curthread);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+
+ devvp = nd.ni_vp;
+
+ if (!vn_isdisk(devvp, &error)) {
+ vput(devvp);
+ return (error);
+ }
+
+ /* Check the access rights on the mount device */
+ error = VOP_ACCESS(devvp, VREAD, curthread->td_ucred, curthread);
+ if (error)
+ error = priv_check(curthread, PRIV_VFS_MOUNT_PERM);
+ if (error) {
+ vput(devvp);
+ return (error);
+ }
+
+ vfs_getnewfsid(mp);
+
+ error = nandfs_mountfs(devvp, mp);
+ if (error)
+ return (error);
+ vfs_mountedfrom(mp, from);
+
+ return (0);
+}
+
+static int
+nandfs_mountfs(struct vnode *devvp, struct mount *mp)
+{
+ struct nandfsmount *nmp = NULL;
+ struct nandfs_args *args = NULL;
+ struct nandfs_device *nandfsdev;
+ char *from;
+ int error, ronly;
+ char *cpno;
+
+ ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+
+ if (devvp->v_rdev->si_iosize_max != 0)
+ mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
+ VOP_UNLOCK(devvp, 0);
+
+ if (mp->mnt_iosize_max > MAXPHYS)
+ mp->mnt_iosize_max = MAXPHYS;
+
+ from = vfs_getopts(mp->mnt_optnew, "from", &error);
+ if (error)
+ goto error;
+
+ error = vfs_getopt(mp->mnt_optnew, "snap", (void **)&cpno, NULL);
+ if (error == ENOENT)
+ cpno = NULL;
+ else if (error)
+ goto error;
+
+ args = (struct nandfs_args *)malloc(sizeof(struct nandfs_args),
+ M_NANDFSMNT, M_WAITOK | M_ZERO);
+
+ if (cpno != NULL)
+ args->cpno = strtoul(cpno, (char **)NULL, 10);
+ else
+ args->cpno = 0;
+ args->fspec = from;
+
+ if (args->cpno != 0 && !ronly) {
+ error = EROFS;
+ goto error;
+ }
+
+ printf("WARNING: NANDFS is considered to be a highly experimental "
+ "feature in FreeBSD.\n");
+
+ error = nandfs_mount_device(devvp, mp, args, &nandfsdev);
+ if (error)
+ goto error;
+
+ nmp = (struct nandfsmount *) malloc(sizeof(struct nandfsmount),
+ M_NANDFSMNT, M_WAITOK | M_ZERO);
+
+ mp->mnt_data = nmp;
+ nmp->nm_vfs_mountp = mp;
+ nmp->nm_ronly = ronly;
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_kern_flag |= MNTK_MPSAFE;
+ MNT_IUNLOCK(mp);
+ nmp->nm_nandfsdev = nandfsdev;
+ /* Add our mountpoint */
+ STAILQ_INSERT_TAIL(&nandfsdev->nd_mounts, nmp, nm_next_mount);
+
+ if (args->cpno > nandfsdev->nd_last_cno) {
+ printf("WARNING: supplied checkpoint number (%jd) is greater "
+ "than last known checkpoint on filesystem (%jd). Mounting"
+ " checkpoint %jd\n", (uintmax_t)args->cpno,
+ (uintmax_t)nandfsdev->nd_last_cno,
+ (uintmax_t)nandfsdev->nd_last_cno);
+ args->cpno = nandfsdev->nd_last_cno;
+ }
+
+ /* Setting up other parameters */
+ nmp->nm_mount_args = *args;
+ free(args, M_NANDFSMNT);
+ error = nandfs_mount_checkpoint(nmp);
+ if (error) {
+ nandfs_unmount(mp, MNT_FORCE);
+ goto unmounted;
+ }
+
+ if (!ronly) {
+ error = start_syncer(nmp);
+ if (error == 0)
+ error = nandfs_start_cleaner(nmp->nm_nandfsdev);
+ if (error)
+ nandfs_unmount(mp, MNT_FORCE);
+ }
+
+ return (0);
+
+error:
+ if (args != NULL)
+ free(args, M_NANDFSMNT);
+
+ if (nmp != NULL) {
+ free(nmp, M_NANDFSMNT);
+ mp->mnt_data = NULL;
+ }
+unmounted:
+ return (error);
+}
+
+static int
+nandfs_unmount(struct mount *mp, int mntflags)
+{
+ struct nandfs_device *nandfsdev;
+ struct nandfsmount *nmp;
+ int error;
+ int flags = 0;
+
+ DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp));
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ nmp = mp->mnt_data;
+ nandfsdev = nmp->nm_nandfsdev;
+
+ error = vflush(mp, 0, flags | SKIPSYSTEM, curthread);
+ if (error)
+ return (error);
+
+ if (!(nmp->nm_ronly)) {
+ nandfs_stop_cleaner(nandfsdev);
+ stop_syncer(nmp);
+ }
+
+ if (nmp->nm_ifile_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nmp->nm_ifile_node));
+
+ /* Remove our mount point */
+ STAILQ_REMOVE(&nandfsdev->nd_mounts, nmp, nandfsmount, nm_next_mount);
+
+ /* Unmount the device itself when we're the last one */
+ nandfs_unmount_device(nandfsdev);
+
+ free_nandfs_mountinfo(mp);
+
+ /*
+ * Finally, throw away the null_mount structure
+ */
+ mp->mnt_data = 0;
+ MNT_ILOCK(mp);
+ mp->mnt_flag &= ~MNT_LOCAL;
+ MNT_IUNLOCK(mp);
+
+ return (0);
+}
+
+static int
+nandfs_statfs(struct mount *mp, struct statfs *sbp)
+{
+ struct nandfsmount *nmp;
+ struct nandfs_device *nandfsdev;
+ struct nandfs_fsdata *fsdata;
+ struct nandfs_super_block *sb;
+ struct nandfs_block_group_desc *groups;
+ struct nandfs_node *ifile;
+ struct nandfs_mdt *mdt;
+ struct buf *bp;
+ int i, error;
+ uint32_t entries_per_group;
+ uint64_t files = 0;
+
+ nmp = mp->mnt_data;
+ nandfsdev = nmp->nm_nandfsdev;
+ fsdata = &nandfsdev->nd_fsdata;
+ sb = &nandfsdev->nd_super;
+ ifile = nmp->nm_ifile_node;
+ mdt = &nandfsdev->nd_ifile_mdt;
+ entries_per_group = mdt->entries_per_group;
+
+ VOP_LOCK(NTOV(ifile), LK_SHARED);
+ error = nandfs_bread(ifile, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(ifile), 0);
+ return (error);
+ }
+
+ groups = (struct nandfs_block_group_desc *)bp->b_data;
+
+ for (i = 0; i < mdt->groups_per_desc_block; i++)
+ files += (entries_per_group - groups[i].bg_nfrees);
+
+ brelse(bp);
+ VOP_UNLOCK(NTOV(ifile), 0);
+
+ sbp->f_bsize = nandfsdev->nd_blocksize;
+ sbp->f_iosize = sbp->f_bsize;
+ sbp->f_blocks = fsdata->f_blocks_per_segment * fsdata->f_nsegments;
+ sbp->f_bfree = sb->s_free_blocks_count;
+ sbp->f_bavail = sbp->f_bfree;
+ sbp->f_files = files;
+ sbp->f_ffree = 0;
+ return (0);
+}
+
+static int
+nandfs_root(struct mount *mp, int flags, struct vnode **vpp)
+{
+ struct nandfsmount *nmp = VFSTONANDFS(mp);
+ struct nandfs_node *node;
+ int error;
+
+ error = nandfs_get_node(nmp, NANDFS_ROOT_INO, &node);
+ if (error)
+ return (error);
+
+ KASSERT(NTOV(node)->v_vflag & VV_ROOT,
+ ("root_vp->v_vflag & VV_ROOT"));
+
+ *vpp = NTOV(node);
+
+ return (error);
+}
+
+static int
+nandfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
+{
+ struct nandfsmount *nmp = VFSTONANDFS(mp);
+ struct nandfs_node *node;
+ int error;
+
+ error = nandfs_get_node(nmp, ino, &node);
+ if (node)
+ *vpp = NTOV(node);
+
+ return (error);
+}
+
+static int
+nandfs_sync(struct mount *mp, int waitfor)
+{
+ struct nandfsmount *nmp = VFSTONANDFS(mp);
+
+ DPRINTF(SYNC, ("%s: mp %p waitfor %d\n", __func__, mp, waitfor));
+
+ /*
+ * XXX: A hack to be removed soon
+ */
+ if (waitfor == MNT_LAZY)
+ return (0);
+ if (waitfor == MNT_SUSPEND)
+ return (0);
+ nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_VFS_SYNC);
+ return (0);
+}
+
+static struct vfsops nandfs_vfsops = {
+ .vfs_init = nandfs_init,
+ .vfs_mount = nandfs_mount,
+ .vfs_root = nandfs_root,
+ .vfs_statfs = nandfs_statfs,
+ .vfs_uninit = nandfs_uninit,
+ .vfs_unmount = nandfs_unmount,
+ .vfs_vget = nandfs_vget,
+ .vfs_sync = nandfs_sync,
+};
+
+VFS_SET(nandfs_vfsops, nandfs, VFCF_LOOPBACK);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nandfs/nandfs_vnops.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/fs/nandfs/nandfs_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,2455 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_vnops.c,v 1.2 2009/08/26 03:40:48 elad
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_vnops.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/lockf.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/fcntl.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+#include <sys/priv.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include <machine/_inttypes.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+extern uma_zone_t nandfs_node_zone;
+static void nandfs_read_filebuf(struct nandfs_node *, struct buf *);
+static void nandfs_itimes_locked(struct vnode *);
+static int nandfs_truncate(struct vnode *, uint64_t);
+
+static vop_pathconf_t nandfs_pathconf;
+
+#define UPDATE_CLOSE 0
+#define UPDATE_WAIT 0
+
+static int
+nandfs_inactive(struct vop_inactive_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ int error = 0;
+
+ DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, node));
+
+ if (node == NULL) {
+ DPRINTF(NODE, ("%s: inactive NULL node\n", __func__));
+ return (0);
+ }
+
+ if (node->nn_inode.i_mode != 0 && !(node->nn_inode.i_links_count)) {
+ nandfs_truncate(vp, 0);
+ error = nandfs_node_destroy(node);
+ if (error)
+ nandfs_error("%s: destroy node: %p\n", __func__, node);
+ node->nn_flags = 0;
+ vrecycle(vp);
+ }
+
+ return (error);
+}
+
+static int
+nandfs_reclaim(struct vop_reclaim_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *nandfs_node = VTON(vp);
+ struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev;
+ uint64_t ino = nandfs_node->nn_ino;
+
+ DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node));
+
+ /* Invalidate all entries to a particular vnode. */
+ cache_purge(vp);
+
+ /* Destroy the vm object and flush associated pages. */
+ vnode_destroy_vobject(vp);
+
+ /* Remove from vfs hash if not system vnode */
+ if (!NANDFS_SYS_NODE(nandfs_node->nn_ino))
+ vfs_hash_remove(vp);
+
+ /* Dispose all node knowledge */
+ nandfs_dispose_node(&nandfs_node);
+
+ if (!NANDFS_SYS_NODE(ino))
+ NANDFS_WRITEUNLOCK(fsdev);
+
+ return (0);
+}
+
+static int
+nandfs_read(struct vop_read_args *ap)
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nandfs_node *node = VTON(vp);
+ struct nandfs_device *nandfsdev = node->nn_nandfsdev;
+ struct uio *uio = ap->a_uio;
+ struct buf *bp;
+ uint64_t size;
+ uint32_t blocksize;
+ off_t bytesinfile;
+ ssize_t toread, off;
+ daddr_t lbn;
+ ssize_t resid;
+ int error = 0;
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ size = node->nn_inode.i_size;
+ if (uio->uio_offset >= size)
+ return (0);
+
+ blocksize = nandfsdev->nd_blocksize;
+ bytesinfile = size - uio->uio_offset;
+
+ resid = omin(uio->uio_resid, bytesinfile);
+
+ while (resid) {
+ lbn = uio->uio_offset / blocksize;
+ off = uio->uio_offset & (blocksize - 1);
+
+ toread = omin(resid, blocksize - off);
+
+ DPRINTF(READ, ("nandfs_read bn: 0x%jx toread: 0x%zx (0x%x)\n",
+ (uintmax_t)lbn, toread, blocksize));
+
+ error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ break;
+ }
+
+ error = uiomove(bp->b_data + off, toread, uio);
+ if (error) {
+ brelse(bp);
+ break;
+ }
+
+ brelse(bp);
+ resid -= toread;
+ }
+
+ return (error);
+}
+
+static int
+nandfs_write(struct vop_write_args *ap)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_node *node;
+ struct vnode *vp;
+ struct uio *uio;
+ struct buf *bp;
+ uint64_t file_size, vblk;
+ uint32_t blocksize;
+ ssize_t towrite, off;
+ daddr_t lbn;
+ ssize_t resid;
+ int error, ioflag, modified;
+
+ vp = ap->a_vp;
+ uio = ap->a_uio;
+ ioflag = ap->a_ioflag;
+ node = VTON(vp);
+ fsdev = node->nn_nandfsdev;
+
+ if (nandfs_fs_full(fsdev))
+ return (ENOSPC);
+
+ DPRINTF(WRITE, ("nandfs_write called %#zx at %#jx\n",
+ uio->uio_resid, (uintmax_t)uio->uio_offset));
+
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ if (uio->uio_resid == 0)
+ return (0);
+
+ blocksize = fsdev->nd_blocksize;
+ file_size = node->nn_inode.i_size;
+
+ switch (vp->v_type) {
+ case VREG:
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = file_size;
+ break;
+ case VDIR:
+ return (EISDIR);
+ case VLNK:
+ break;
+ default:
+ panic("%s: bad file type vp: %p", __func__, vp);
+ }
+
+ /* If explicitly asked to append, uio_offset can be wrong? */
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = file_size;
+
+ resid = uio->uio_resid;
+ modified = error = 0;
+
+ while (uio->uio_resid) {
+ lbn = uio->uio_offset / blocksize;
+ off = uio->uio_offset & (blocksize - 1);
+
+ towrite = omin(uio->uio_resid, blocksize - off);
+
+ DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x)\n",
+ __func__, (uintmax_t)lbn, towrite, blocksize));
+
+ error = nandfs_bmap_lookup(node, lbn, &vblk);
+ if (error)
+ break;
+
+ DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x) "
+ "vblk=%jx\n", __func__, (uintmax_t)lbn, towrite, blocksize,
+ vblk));
+
+ if (vblk != 0)
+ error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
+ else
+ error = nandfs_bcreate(node, lbn, NOCRED, 0, &bp);
+
+ DPRINTF(WRITE, ("%s: vp %p bread bp %p lbn %#jx\n", __func__,
+ vp, bp, (uintmax_t)lbn));
+ if (error) {
+ if (bp)
+ brelse(bp);
+ break;
+ }
+
+ error = uiomove((char *)bp->b_data + off, (int)towrite, uio);
+ if (error)
+ break;
+
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ break;
+
+ modified++;
+ }
+
+ /* XXX proper handling when only part of file was properly written */
+ if (modified) {
+ if (resid > uio->uio_resid && ap->a_cred &&
+ ap->a_cred->cr_uid != 0)
+ node->nn_inode.i_mode &= ~(ISUID | ISGID);
+
+ if (file_size < uio->uio_offset + uio->uio_resid) {
+ node->nn_inode.i_size = uio->uio_offset +
+ uio->uio_resid;
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ vnode_pager_setsize(vp, uio->uio_offset +
+ uio->uio_resid);
+ nandfs_itimes(vp);
+ }
+ }
+
+ DPRINTF(WRITE, ("%s: return:%d\n", __func__, error));
+
+ return (error);
+}
+
+static int
+nandfs_lookup(struct vop_cachedlookup_args *ap)
+{
+ struct vnode *dvp, **vpp;
+ struct componentname *cnp;
+ struct ucred *cred;
+ struct thread *td;
+ struct nandfs_node *dir_node, *node;
+ struct nandfsmount *nmp;
+ uint64_t ino, off;
+ const char *name;
+ int namelen, nameiop, islastcn, mounted_ro;
+ int error, found;
+
+ DPRINTF(VNCALL, ("%s\n", __func__));
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ *vpp = NULL;
+
+ cnp = ap->a_cnp;
+ cred = cnp->cn_cred;
+ td = cnp->cn_thread;
+
+ dir_node = VTON(dvp);
+ nmp = dir_node->nn_nmp;
+
+ /* Simplify/clarification flags */
+ nameiop = cnp->cn_nameiop;
+ islastcn = cnp->cn_flags & ISLASTCN;
+ mounted_ro = dvp->v_mount->mnt_flag & MNT_RDONLY;
+
+ /*
+ * If requesting a modify on the last path element on a read-only
+ * filingsystem, reject lookup;
+ */
+ if (islastcn && mounted_ro && (nameiop == DELETE || nameiop == RENAME))
+ return (EROFS);
+
+ if (dir_node->nn_inode.i_links_count == 0)
+ return (ENOENT);
+
+ /*
+ * Obviously, the file is not (anymore) in the namecache, we have to
+ * search for it. There are three basic cases: '.', '..' and others.
+ *
+ * Following the guidelines of VOP_LOOKUP manpage and tmpfs.
+ */
+ error = 0;
+ if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) {
+ DPRINTF(LOOKUP, ("\tlookup '.'\n"));
+ /* Special case 1 '.' */
+ VREF(dvp);
+ *vpp = dvp;
+ /* Done */
+ } else if (cnp->cn_flags & ISDOTDOT) {
+ /* Special case 2 '..' */
+ DPRINTF(LOOKUP, ("\tlookup '..'\n"));
+
+ /* Get our node */
+ name = "..";
+ namelen = 2;
+ error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino,
+ &found, &off);
+ if (error)
+ goto out;
+ if (!found)
+ error = ENOENT;
+
+ /* First unlock parent */
+ VOP_UNLOCK(dvp, 0);
+
+ if (error == 0) {
+ DPRINTF(LOOKUP, ("\tfound '..'\n"));
+ /* Try to create/reuse the node */
+ error = nandfs_get_node(nmp, ino, &node);
+
+ if (!error) {
+ DPRINTF(LOOKUP,
+ ("\tnode retrieved/created OK\n"));
+ *vpp = NTOV(node);
+ }
+ }
+
+ /* Try to relock parent */
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ } else {
+ DPRINTF(LOOKUP, ("\tlookup file\n"));
+ /* All other files */
+ /* Look up filename in the directory returning its inode */
+ name = cnp->cn_nameptr;
+ namelen = cnp->cn_namelen;
+ error = nandfs_lookup_name_in_dir(dvp, name, namelen,
+ &ino, &found, &off);
+ if (error)
+ goto out;
+ if (!found) {
+ DPRINTF(LOOKUP, ("\tNOT found\n"));
+ /*
+ * UGH, didn't find name. If we're creating or
+ * renaming on the last name this is OK and we ought
+ * to return EJUSTRETURN if its allowed to be created.
+ */
+ error = ENOENT;
+ if ((nameiop == CREATE || nameiop == RENAME) &&
+ islastcn) {
+ error = VOP_ACCESS(dvp, VWRITE, cred,
+ td);
+ if (!error) {
+ /* keep the component name */
+ cnp->cn_flags |= SAVENAME;
+ error = EJUSTRETURN;
+ }
+ }
+ /* Done */
+ } else {
+ if (ino == NANDFS_WHT_INO)
+ cnp->cn_flags |= ISWHITEOUT;
+
+ if ((cnp->cn_flags & ISWHITEOUT) &&
+ (nameiop == LOOKUP))
+ return (ENOENT);
+
+ if ((nameiop == DELETE) && islastcn) {
+ if ((cnp->cn_flags & ISWHITEOUT) &&
+ (cnp->cn_flags & DOWHITEOUT)) {
+ cnp->cn_flags |= SAVENAME;
+ dir_node->nn_diroff = off;
+ return (EJUSTRETURN);
+ }
+
+ error = VOP_ACCESS(dvp, VWRITE, cred,
+ cnp->cn_thread);
+ if (error)
+ return (error);
+
+ /* Try to create/reuse the node */
+ error = nandfs_get_node(nmp, ino, &node);
+ if (!error) {
+ *vpp = NTOV(node);
+ node->nn_diroff = off;
+ }
+
+ if ((dir_node->nn_inode.i_mode & ISVTX) &&
+ cred->cr_uid != 0 &&
+ cred->cr_uid != dir_node->nn_inode.i_uid &&
+ node->nn_inode.i_uid != cred->cr_uid) {
+ vput(*vpp);
+ *vpp = NULL;
+ return (EPERM);
+ }
+ } else if ((nameiop == RENAME) && islastcn) {
+ error = VOP_ACCESS(dvp, VWRITE, cred,
+ cnp->cn_thread);
+ if (error)
+ return (error);
+
+ /* Try to create/reuse the node */
+ error = nandfs_get_node(nmp, ino, &node);
+ if (!error) {
+ *vpp = NTOV(node);
+ node->nn_diroff = off;
+ }
+ } else {
+ /* Try to create/reuse the node */
+ error = nandfs_get_node(nmp, ino, &node);
+ if (!error) {
+ *vpp = NTOV(node);
+ node->nn_diroff = off;
+ }
+ }
+ }
+ }
+
+out:
+ /*
+ * Store result in the cache if requested. If we are creating a file,
+ * the file might not be found and thus putting it into the namecache
+ * might be seen as negative caching.
+ */
+ if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+ cache_enter(dvp, *vpp, cnp);
+
+ return (error);
+
+}
+
+static int
+nandfs_getattr(struct vop_getattr_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+
+ DPRINTF(VNCALL, ("%s: vp: %p\n", __func__, vp));
+ nandfs_itimes(vp);
+
+ /* Basic info */
+ VATTR_NULL(vap);
+ vap->va_atime.tv_sec = inode->i_mtime;
+ vap->va_atime.tv_nsec = inode->i_mtime_nsec;
+ vap->va_mtime.tv_sec = inode->i_mtime;
+ vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
+ vap->va_ctime.tv_sec = inode->i_ctime;
+ vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
+ vap->va_type = IFTOVT(inode->i_mode);
+ vap->va_mode = inode->i_mode & ~S_IFMT;
+ vap->va_nlink = inode->i_links_count;
+ vap->va_uid = inode->i_uid;
+ vap->va_gid = inode->i_gid;
+ vap->va_rdev = inode->i_special;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_fileid = node->nn_ino;
+ vap->va_size = inode->i_size;
+ vap->va_blocksize = node->nn_nandfsdev->nd_blocksize;
+ vap->va_gen = 0;
+ vap->va_flags = inode->i_flags;
+ vap->va_bytes = inode->i_blocks * vap->va_blocksize;
+ vap->va_filerev = 0;
+ vap->va_vaflags = 0;
+
+ return (0);
+}
+
+static int
+nandfs_vtruncbuf(struct vnode *vp, uint64_t nblks)
+{
+ struct nandfs_device *nffsdev;
+ struct bufobj *bo;
+ struct buf *bp, *nbp;
+
+ bo = &vp->v_bufobj;
+ nffsdev = VTON(vp)->nn_nandfsdev;
+
+ ASSERT_VOP_LOCKED(vp, "nandfs_truncate");
+restart:
+ BO_LOCK(bo);
+restart_locked:
+ TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
+ if (bp->b_lblkno < nblks)
+ continue;
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+ goto restart_locked;
+
+ bremfree(bp);
+ bp->b_flags |= (B_INVAL | B_RELBUF);
+ bp->b_flags &= ~(B_ASYNC | B_MANAGED);
+ BO_UNLOCK(bo);
+ brelse(bp);
+ BO_LOCK(bo);
+ }
+
+ TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
+ if (bp->b_lblkno < nblks)
+ continue;
+ if (BUF_LOCK(bp,
+ LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+ BO_MTX(bo)) == ENOLCK)
+ goto restart;
+ bp->b_flags |= (B_INVAL | B_RELBUF);
+ bp->b_flags &= ~(B_ASYNC | B_MANAGED);
+ brelse(bp);
+ nandfs_dirty_bufs_decrement(nffsdev);
+ BO_LOCK(bo);
+ }
+
+ BO_UNLOCK(bo);
+
+ return (0);
+}
+
+static int
+nandfs_truncate(struct vnode *vp, uint64_t newsize)
+{
+ struct nandfs_device *nffsdev;
+ struct nandfs_node *node;
+ struct nandfs_inode *inode;
+ struct buf *bp = NULL;
+ uint64_t oblks, nblks, vblk, size, rest;
+ int error;
+
+ node = VTON(vp);
+ nffsdev = node->nn_nandfsdev;
+ inode = &node->nn_inode;
+
+ /* Calculate end of file */
+ size = inode->i_size;
+
+ if (newsize == size) {
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ nandfs_itimes(vp);
+ return (0);
+ }
+
+ if (newsize > size) {
+ inode->i_size = newsize;
+ vnode_pager_setsize(vp, newsize);
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ nandfs_itimes(vp);
+ return (0);
+ }
+
+ nblks = howmany(newsize, nffsdev->nd_blocksize);
+ oblks = howmany(size, nffsdev->nd_blocksize);
+ rest = newsize % nffsdev->nd_blocksize;
+
+ if (rest) {
+ error = nandfs_bmap_lookup(node, nblks - 1, &vblk);
+ if (error)
+ return (error);
+
+ if (vblk != 0)
+ error = nandfs_bread(node, nblks - 1, NOCRED, 0, &bp);
+ else
+ error = nandfs_bcreate(node, nblks - 1, NOCRED, 0, &bp);
+
+ if (error) {
+ if (bp)
+ brelse(bp);
+ return (error);
+ }
+
+ bzero((char *)bp->b_data + rest,
+ (u_int)(nffsdev->nd_blocksize - rest));
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+ }
+
+ DPRINTF(VNCALL, ("%s: vp %p oblks %jx nblks %jx\n", __func__, vp, oblks,
+ nblks));
+
+ error = nandfs_bmap_truncate_mapping(node, oblks - 1, nblks - 1);
+ if (error) {
+ if (bp)
+ nandfs_undirty_buf(bp);
+ return (error);
+ }
+
+ error = nandfs_vtruncbuf(vp, nblks);
+ if (error) {
+ if (bp)
+ nandfs_undirty_buf(bp);
+ return (error);
+ }
+
+ inode->i_size = newsize;
+ vnode_pager_setsize(vp, newsize);
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ nandfs_itimes(vp);
+
+ return (error);
+}
+
+static void
+nandfs_itimes_locked(struct vnode *vp)
+{
+ struct nandfs_node *node;
+ struct nandfs_inode *inode;
+ struct timespec ts;
+
+ ASSERT_VI_LOCKED(vp, __func__);
+
+ node = VTON(vp);
+ inode = &node->nn_inode;
+
+ if ((node->nn_flags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
+ return;
+
+ if (((vp->v_mount->mnt_kern_flag &
+ (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
+ (node->nn_flags & (IN_CHANGE | IN_UPDATE)))
+ node->nn_flags |= IN_MODIFIED;
+
+ vfs_timestamp(&ts);
+ if (node->nn_flags & IN_UPDATE) {
+ inode->i_mtime = ts.tv_sec;
+ inode->i_mtime_nsec = ts.tv_nsec;
+ }
+ if (node->nn_flags & IN_CHANGE) {
+ inode->i_ctime = ts.tv_sec;
+ inode->i_ctime_nsec = ts.tv_nsec;
+ }
+
+ node->nn_flags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
+}
+
+void
+nandfs_itimes(struct vnode *vp)
+{
+
+ VI_LOCK(vp);
+ nandfs_itimes_locked(vp);
+ VI_UNLOCK(vp);
+}
+
+static int
+nandfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
+{
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+ uint16_t nmode;
+ int error = 0;
+
+ DPRINTF(VNCALL, ("%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp,
+ mode, cred, td));
+ /*
+ * To modify the permissions on a file, must possess VADMIN
+ * for that file.
+ */
+ if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
+ return (error);
+
+ /*
+ * Privileged processes may set the sticky bit on non-directories,
+ * as well as set the setgid bit on a file with a group that the
+ * process is not a member of. Both of these are allowed in
+ * jail(8).
+ */
+ if (vp->v_type != VDIR && (mode & S_ISTXT)) {
+ if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
+ return (EFTYPE);
+ }
+ if (!groupmember(inode->i_gid, cred) && (mode & ISGID)) {
+ error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Deny setting setuid if we are not the file owner.
+ */
+ if ((mode & ISUID) && inode->i_uid != cred->cr_uid) {
+ error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
+ if (error)
+ return (error);
+ }
+
+ nmode = inode->i_mode;
+ nmode &= ~ALLPERMS;
+ nmode |= (mode & ALLPERMS);
+ inode->i_mode = nmode;
+ node->nn_flags |= IN_CHANGE;
+
+ DPRINTF(VNCALL, ("%s: to mode %x\n", __func__, nmode));
+
+ return (error);
+}
+
+static int
+nandfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
+ struct thread *td)
+{
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+ uid_t ouid;
+ gid_t ogid;
+ int error = 0;
+
+ if (uid == (uid_t)VNOVAL)
+ uid = inode->i_uid;
+ if (gid == (gid_t)VNOVAL)
+ gid = inode->i_gid;
+ /*
+ * To modify the ownership of a file, must possess VADMIN for that
+ * file.
+ */
+ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
+ return (error);
+ /*
+ * To change the owner of a file, or change the group of a file to a
+ * group of which we are not a member, the caller must have
+ * privilege.
+ */
+ if (((uid != inode->i_uid && uid != cred->cr_uid) ||
+ (gid != inode->i_gid && !groupmember(gid, cred))) &&
+ (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
+ return (error);
+ ogid = inode->i_gid;
+ ouid = inode->i_uid;
+
+ inode->i_gid = gid;
+ inode->i_uid = uid;
+
+ node->nn_flags |= IN_CHANGE;
+ if ((inode->i_mode & (ISUID | ISGID)) &&
+ (ouid != uid || ogid != gid)) {
+ if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
+ inode->i_mode &= ~(ISUID | ISGID);
+ }
+ }
+ DPRINTF(VNCALL, ("%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp,
+ cred, td));
+ return (0);
+}
+
+static int
+nandfs_setattr(struct vop_setattr_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+ struct vattr *vap = ap->a_vap;
+ struct ucred *cred = ap->a_cred;
+ struct thread *td = curthread;
+ uint32_t flags;
+ int error = 0;
+
+ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+ (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+ (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+ (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+ DPRINTF(VNCALL, ("%s: unsettable attribute\n", __func__));
+ return (EINVAL);
+ }
+
+ if (vap->va_flags != VNOVAL) {
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p flags:%lx\n", __func__, vp,
+ td, vap->va_flags));
+
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ /*
+ * Callers may only modify the file flags on objects they
+ * have VADMIN rights for.
+ */
+ if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
+ return (error);
+ /*
+ * Unprivileged processes are not permitted to unset system
+ * flags, or modify flags if any system flags are set.
+ * Privileged non-jail processes may not modify system flags
+ * if securelevel > 0 and any existing system flags are set.
+ * Privileged jail processes behave like privileged non-jail
+ * processes if the security.jail.chflags_allowed sysctl is
+ * is non-zero; otherwise, they behave like unprivileged
+ * processes.
+ */
+
+ flags = inode->i_flags;
+ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
+ if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
+ error = securelevel_gt(cred, 0);
+ if (error)
+ return (error);
+ }
+ /* Snapshot flag cannot be set or cleared */
+ if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
+ (flags & SF_SNAPSHOT) == 0) ||
+ ((vap->va_flags & SF_SNAPSHOT) == 0 &&
+ (flags & SF_SNAPSHOT) != 0))
+ return (EPERM);
+
+ inode->i_flags = vap->va_flags;
+ } else {
+ if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
+ (vap->va_flags & UF_SETTABLE) != vap->va_flags)
+ return (EPERM);
+
+ flags &= SF_SETTABLE;
+ flags |= (vap->va_flags & UF_SETTABLE);
+ inode->i_flags = flags;
+ }
+ node->nn_flags |= IN_CHANGE;
+ if (vap->va_flags & (IMMUTABLE | APPEND))
+ return (0);
+ }
+ if (inode->i_flags & (IMMUTABLE | APPEND))
+ return (EPERM);
+
+ if (vap->va_size != (u_quad_t)VNOVAL) {
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p size:%jx\n", __func__, vp, td,
+ (uintmax_t)vap->va_size));
+
+ switch (vp->v_type) {
+ case VDIR:
+ return (EISDIR);
+ case VLNK:
+ case VREG:
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ if ((inode->i_flags & SF_SNAPSHOT) != 0)
+ return (EPERM);
+ break;
+ default:
+ return (0);
+ }
+
+ if (vap->va_size > node->nn_nandfsdev->nd_maxfilesize)
+ return (EFBIG);
+
+ KASSERT((vp->v_type == VREG), ("Set size %d", vp->v_type));
+ nandfs_truncate(vp, vap->va_size);
+ node->nn_flags |= IN_CHANGE;
+
+ return (0);
+ }
+
+ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
+ vp, td, vap->va_uid, vap->va_gid));
+ error = nandfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
+ if (error)
+ return (error);
+ }
+
+ if (vap->va_mode != (mode_t)VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p mode %x\n", __func__, vp, td,
+ vap->va_mode));
+
+ error = nandfs_chmod(vp, (int)vap->va_mode, cred, td);
+ if (error)
+ return (error);
+ }
+ if (vap->va_atime.tv_sec != VNOVAL ||
+ vap->va_mtime.tv_sec != VNOVAL ||
+ vap->va_birthtime.tv_sec != VNOVAL) {
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p time a/m/b %jx/%jx/%jx\n",
+ __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
+ (uintmax_t)vap->va_mtime.tv_sec,
+ (uintmax_t)vap->va_birthtime.tv_sec));
+
+ if (vap->va_atime.tv_sec != VNOVAL)
+ node->nn_flags |= IN_ACCESS;
+ if (vap->va_mtime.tv_sec != VNOVAL)
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ if (vap->va_birthtime.tv_sec != VNOVAL)
+ node->nn_flags |= IN_MODIFIED;
+ nandfs_itimes(vp);
+ return (0);
+ }
+
+ return (0);
+}
+
+static int
+nandfs_open(struct vop_open_args *ap)
+{
+ struct nandfs_node *node = VTON(ap->a_vp);
+ uint64_t filesize;
+
+ DPRINTF(VNCALL, ("nandfs_open called ap->a_mode %x\n", ap->a_mode));
+
+ if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+ return (EOPNOTSUPP);
+
+ if ((node->nn_inode.i_flags & APPEND) &&
+ (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
+ return (EPERM);
+
+ filesize = node->nn_inode.i_size;
+ vnode_create_vobject(ap->a_vp, filesize, ap->a_td);
+
+ return (0);
+}
+
+static int
+nandfs_close(struct vop_close_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+
+ DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
+
+ mtx_lock(&vp->v_interlock);
+ if (vp->v_usecount > 1)
+ nandfs_itimes_locked(vp);
+ mtx_unlock(&vp->v_interlock);
+
+ return (0);
+}
+
+static int
+nandfs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
+{
+
+ /* Check if we are allowed to write */
+ switch (vap->va_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ /*
+ * Normal nodes: check if we're on a read-only mounted
+ * filingsystem and bomb out if we're trying to write.
+ */
+ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY))
+ return (EROFS);
+ break;
+ case VBLK:
+ case VCHR:
+ case VSOCK:
+ case VFIFO:
+ /*
+ * Special nodes: even on read-only mounted filingsystems
+ * these are allowed to be written to if permissions allow.
+ */
+ break;
+ default:
+ /* No idea what this is */
+ return (EINVAL);
+ }
+
+ /* Noone may write immutable files */
+ if ((mode & VWRITE) && (VTON(vp)->nn_inode.i_flags & IMMUTABLE))
+ return (EPERM);
+
+ return (0);
+}
+
+static int
+nandfs_check_permitted(struct vnode *vp, struct vattr *vap, mode_t mode,
+ struct ucred *cred)
+{
+
+ return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode,
+ cred, NULL));
+}
+
+static int
+nandfs_advlock(struct vop_advlock_args *ap)
+{
+ struct nandfs_node *nvp;
+ quad_t size;
+
+ nvp = VTON(ap->a_vp);
+ size = nvp->nn_inode.i_size;
+ return (lf_advlock(ap, &(nvp->nn_lockf), size));
+}
+
+static int
+nandfs_access(struct vop_access_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ accmode_t accmode = ap->a_accmode;
+ struct ucred *cred = ap->a_cred;
+ struct vattr vap;
+ int error;
+
+ DPRINTF(VNCALL, ("%s: vp:%p mode: %x\n", __func__, vp, accmode));
+
+ error = VOP_GETATTR(vp, &vap, NULL);
+ if (error)
+ return (error);
+
+ error = nandfs_check_possible(vp, &vap, accmode);
+ if (error) {
+ return (error);
+ }
+
+ error = nandfs_check_permitted(vp, &vap, accmode, cred);
+
+ return (error);
+}
+
+static int
+nandfs_print(struct vop_print_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *nvp = VTON(vp);
+
+ printf("\tvp=%p, nandfs_node=%p\n", vp, nvp);
+ printf("nandfs inode %#jx\n", (uintmax_t)nvp->nn_ino);
+ printf("flags = 0x%b\n", (u_int)nvp->nn_flags, PRINT_NODE_FLAGS);
+
+ return (0);
+}
+
+static void
+nandfs_read_filebuf(struct nandfs_node *node, struct buf *bp)
+{
+ struct nandfs_device *nandfsdev = node->nn_nandfsdev;
+ struct buf *nbp;
+ nandfs_daddr_t vblk, pblk;
+ nandfs_lbn_t from;
+ uint32_t blocksize;
+ int error = 0;
+ int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
+
+ /*
+ * Translate all the block sectors into a series of buffers to read
+ * asynchronously from the nandfs device. Note that this lookup may
+ * induce readin's too.
+ */
+
+ blocksize = nandfsdev->nd_blocksize;
+ if (bp->b_bcount / blocksize != 1)
+ panic("invalid b_count in bp %p\n", bp);
+
+ from = bp->b_blkno;
+
+ DPRINTF(READ, ("\tread in from inode %#jx blkno %#jx"
+ " count %#lx\n", (uintmax_t)node->nn_ino, from,
+ bp->b_bcount));
+
+ /* Get virtual block numbers for the vnode's buffer span */
+ error = nandfs_bmap_lookup(node, from, &vblk);
+ if (error) {
+ bp->b_error = EINVAL;
+ bp->b_ioflags |= BIO_ERROR;
+ bufdone(bp);
+ return;
+ }
+
+ /* Translate virtual block numbers to physical block numbers */
+ error = nandfs_vtop(node, vblk, &pblk);
+ if (error) {
+ bp->b_error = EINVAL;
+ bp->b_ioflags |= BIO_ERROR;
+ bufdone(bp);
+ return;
+ }
+
+ /* Issue translated blocks */
+ bp->b_resid = bp->b_bcount;
+
+ /* Note virtual block 0 marks not mapped */
+ if (vblk == 0) {
+ vfs_bio_clrbuf(bp);
+ bufdone(bp);
+ return;
+ }
+
+ nbp = bp;
+ nbp->b_blkno = pblk * blk2dev;
+ bp->b_iooffset = dbtob(nbp->b_blkno);
+ MPASS(bp->b_iooffset >= 0);
+ BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, nbp);
+ nandfs_vblk_set(bp, vblk);
+ DPRINTF(READ, ("read_filebuf : ino %#jx blk %#jx -> "
+ "%#jx -> %#jx [bp %p]\n", (uintmax_t)node->nn_ino,
+ (uintmax_t)(from), (uintmax_t)vblk,
+ (uintmax_t)pblk, nbp));
+}
+
+static void
+nandfs_write_filebuf(struct nandfs_node *node, struct buf *bp)
+{
+ struct nandfs_device *nandfsdev = node->nn_nandfsdev;
+
+ bp->b_iooffset = dbtob(bp->b_blkno);
+ MPASS(bp->b_iooffset >= 0);
+ BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, bp);
+}
+
+static int
+nandfs_strategy(struct vop_strategy_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct buf *bp = ap->a_bp;
+ struct nandfs_node *node = VTON(vp);
+
+
+ /* check if we ought to be here */
+ KASSERT((vp->v_type != VBLK && vp->v_type != VCHR),
+ ("nandfs_strategy on type %d", vp->v_type));
+
+ /* Translate if needed and pass on */
+ if (bp->b_iocmd == BIO_READ) {
+ nandfs_read_filebuf(node, bp);
+ return (0);
+ }
+
+ /* Send to segment collector */
+ nandfs_write_filebuf(node, bp);
+ return (0);
+}
+
+static int
+nandfs_readdir(struct vop_readdir_args *ap)
+{
+ struct uio *uio = ap->a_uio;
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_dir_entry *ndirent;
+ struct dirent dirent;
+ struct buf *bp;
+ uint64_t file_size, diroffset, transoffset, blkoff;
+ uint64_t blocknr;
+ uint32_t blocksize = node->nn_nandfsdev->nd_blocksize;
+ uint8_t *pos, name_len;
+ int error;
+
+ DPRINTF(READDIR, ("nandfs_readdir called\n"));
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ file_size = node->nn_inode.i_size;
+ DPRINTF(READDIR, ("nandfs_readdir filesize %jd resid %zd\n",
+ (uintmax_t)file_size, uio->uio_resid ));
+
+ /* We are called just as long as we keep on pushing data in */
+ error = 0;
+ if ((uio->uio_offset < file_size) &&
+ (uio->uio_resid >= sizeof(struct dirent))) {
+ diroffset = uio->uio_offset;
+ transoffset = diroffset;
+
+ blocknr = diroffset / blocksize;
+ blkoff = diroffset % blocksize;
+ error = nandfs_bread(node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (EIO);
+ }
+ while (diroffset < file_size) {
+ DPRINTF(READDIR, ("readdir : offset = %"PRIu64"\n",
+ diroffset));
+ if (blkoff >= blocksize) {
+ blkoff = 0; blocknr++;
+ brelse(bp);
+ error = nandfs_bread(node, blocknr, NOCRED, 0,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return (EIO);
+ }
+ }
+
+ /* Read in one dirent */
+ pos = (uint8_t *)bp->b_data + blkoff;
+ ndirent = (struct nandfs_dir_entry *)pos;
+
+ name_len = ndirent->name_len;
+ memset(&dirent, 0, sizeof(struct dirent));
+ dirent.d_fileno = ndirent->inode;
+ if (dirent.d_fileno) {
+ dirent.d_type = ndirent->file_type;
+ dirent.d_namlen = name_len;
+ strncpy(dirent.d_name, ndirent->name, name_len);
+ dirent.d_reclen = GENERIC_DIRSIZ(&dirent);
+ DPRINTF(READDIR, ("copying `%*.*s`\n", name_len,
+ name_len, dirent.d_name));
+ }
+
+ /*
+ * If there isn't enough space in the uio to return a
+ * whole dirent, break off read
+ */
+ if (uio->uio_resid < GENERIC_DIRSIZ(&dirent))
+ break;
+
+ /* Transfer */
+ if (dirent.d_fileno)
+ uiomove(&dirent, GENERIC_DIRSIZ(&dirent), uio);
+
+ /* Advance */
+ diroffset += ndirent->rec_len;
+ blkoff += ndirent->rec_len;
+
+ /* Remember the last entry we transfered */
+ transoffset = diroffset;
+ }
+ brelse(bp);
+
+ /* Pass on last transfered offset */
+ uio->uio_offset = transoffset;
+ }
+
+ if (ap->a_eofflag)
+ *ap->a_eofflag = (uio->uio_offset >= file_size);
+
+ return (error);
+}
+
+static int
+nandfs_dirempty(struct vnode *dvp, uint64_t parentino, struct ucred *cred)
+{
+ struct nandfs_node *dnode = VTON(dvp);
+ struct nandfs_dir_entry *dirent;
+ uint64_t file_size = dnode->nn_inode.i_size;
+ uint64_t blockcount = dnode->nn_inode.i_blocks;
+ uint64_t blocknr;
+ uint32_t blocksize = dnode->nn_nandfsdev->nd_blocksize;
+ uint32_t limit;
+ uint32_t off;
+ uint8_t *pos;
+ struct buf *bp;
+ int error;
+
+ DPRINTF(LOOKUP, ("%s: dvp %p parentino %#jx cred %p\n", __func__, dvp,
+ (uintmax_t)parentino, cred));
+
+ KASSERT((file_size != 0), ("nandfs_dirempty for NULL dir %p", dvp));
+
+ blocknr = 0;
+ while (blocknr < blockcount) {
+ error = nandfs_bread(dnode, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (0);
+ }
+
+ pos = (uint8_t *)bp->b_data;
+ off = 0;
+
+ if (blocknr == (blockcount - 1))
+ limit = file_size % blocksize;
+ else
+ limit = blocksize;
+
+ while (off < limit) {
+ dirent = (struct nandfs_dir_entry *)(pos + off);
+ off += dirent->rec_len;
+
+ if (dirent->inode == 0)
+ continue;
+
+ switch (dirent->name_len) {
+ case 0:
+ break;
+ case 1:
+ if (dirent->name[0] != '.')
+ goto notempty;
+
+ KASSERT(dirent->inode == dnode->nn_ino,
+ (".'s inode does not match dir"));
+ break;
+ case 2:
+ if (dirent->name[0] != '.' &&
+ dirent->name[1] != '.')
+ goto notempty;
+
+ KASSERT(dirent->inode == parentino,
+ ("..'s inode does not match parent"));
+ break;
+ default:
+ goto notempty;
+ }
+ }
+
+ brelse(bp);
+ blocknr++;
+ }
+
+ return (1);
+notempty:
+ brelse(bp);
+ return (0);
+}
+
+static int
+nandfs_link(struct vop_link_args *ap)
+{
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *vp = ap->a_vp;
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+ int error;
+
+ if (tdvp->v_mount != vp->v_mount)
+ return (EXDEV);
+
+ if (inode->i_links_count >= LINK_MAX)
+ return (EMLINK);
+
+ if (inode->i_flags & (IMMUTABLE | APPEND))
+ return (EPERM);
+
+ /* Update link count */
+ inode->i_links_count++;
+
+ /* Add dir entry */
+ error = nandfs_add_dirent(tdvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(inode->i_mode));
+ if (error) {
+ inode->i_links_count--;
+ }
+
+ node->nn_flags |= IN_CHANGE;
+ nandfs_itimes(vp);
+ DPRINTF(VNCALL, ("%s: tdvp %p vp %p cnp %p\n",
+ __func__, tdvp, vp, cnp));
+
+ return (0);
+}
+
+static int
+nandfs_create(struct vop_create_args *ap)
+{
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfsmount *nmp = dir_node->nn_nmp;
+ struct nandfs_node *node;
+ int error;
+
+ DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
+
+ if (nandfs_fs_full(dir_node->nn_nandfsdev))
+ return (ENOSPC);
+
+ /* Create new vnode/inode */
+ error = nandfs_node_create(nmp, &node, mode);
+ if (error)
+ return (error);
+ node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
+ node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
+
+ /* Add new dir entry */
+ error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(mode));
+ if (error) {
+ if (nandfs_node_destroy(node)) {
+ nandfs_error("%s: error destroying node %p\n",
+ __func__, node);
+ }
+ return (error);
+ }
+ *vpp = NTOV(node);
+
+ DPRINTF(VNCALL, ("created file vp %p nandnode %p ino %jx\n", *vpp, node,
+ (uintmax_t)node->nn_ino));
+ return (0);
+}
+
+static int
+nandfs_remove(struct vop_remove_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct vnode *dvp = ap->a_dvp;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_node *dnode = VTON(dvp);
+ struct componentname *cnp = ap->a_cnp;
+
+ DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx link %d\n",
+ __func__, dvp, vp, node, (uintmax_t)node->nn_ino,
+ node->nn_inode.i_links_count));
+
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+
+ /* Files marked as immutable or append-only cannot be deleted. */
+ if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
+ (dnode->nn_inode.i_flags & APPEND))
+ return (EPERM);
+
+ nandfs_remove_dirent(dvp, node, cnp);
+ node->nn_inode.i_links_count--;
+ node->nn_flags |= IN_CHANGE;
+
+ return (0);
+}
+
+/*
+ * Check if source directory is in the path of the target directory.
+ * Target is supplied locked, source is unlocked.
+ * The target is always vput before returning.
+ */
+static int
+nandfs_checkpath(struct nandfs_node *src, struct nandfs_node *dest,
+ struct ucred *cred)
+{
+ struct vnode *vp;
+ int error, rootino;
+ struct nandfs_dir_entry dirent;
+
+ vp = NTOV(dest);
+ if (src->nn_ino == dest->nn_ino) {
+ error = EEXIST;
+ goto out;
+ }
+ rootino = NANDFS_ROOT_INO;
+ error = 0;
+ if (dest->nn_ino == rootino)
+ goto out;
+
+ for (;;) {
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ break;
+ }
+
+ error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirent,
+ NANDFS_DIR_REC_LEN(2), (off_t)0, UIO_SYSSPACE,
+ IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED,
+ NULL, NULL);
+ if (error != 0)
+ break;
+ if (dirent.name_len != 2 ||
+ dirent.name[0] != '.' ||
+ dirent.name[1] != '.') {
+ error = ENOTDIR;
+ break;
+ }
+ if (dirent.inode == src->nn_ino) {
+ error = EINVAL;
+ break;
+ }
+ if (dirent.inode == rootino)
+ break;
+ vput(vp);
+ if ((error = VFS_VGET(vp->v_mount, dirent.inode,
+ LK_EXCLUSIVE, &vp)) != 0) {
+ vp = NULL;
+ break;
+ }
+ }
+
+out:
+ if (error == ENOTDIR)
+ printf("checkpath: .. not a directory\n");
+ if (vp != NULL)
+ vput(vp);
+ return (error);
+}
+
+static int
+nandfs_rename(struct vop_rename_args *ap)
+{
+ struct vnode *tvp = ap->a_tvp;
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *fvp = ap->a_fvp;
+ struct vnode *fdvp = ap->a_fdvp;
+ struct componentname *tcnp = ap->a_tcnp;
+ struct componentname *fcnp = ap->a_fcnp;
+ int doingdirectory = 0, oldparent = 0, newparent = 0;
+ int error = 0;
+
+ struct nandfs_node *fdnode, *fnode, *fnode1;
+ struct nandfs_node *tdnode = VTON(tdvp);
+ struct nandfs_node *tnode;
+
+ uint32_t tdflags, fflags, fdflags;
+ uint16_t mode;
+
+ DPRINTF(VNCALL, ("%s: fdvp:%p fvp:%p tdvp:%p tdp:%p\n", __func__, fdvp,
+ fvp, tdvp, tvp));
+
+ /*
+ * Check for cross-device rename.
+ */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+abortit:
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ vrele(fdvp);
+ vrele(fvp);
+ return (error);
+ }
+
+ tdflags = tdnode->nn_inode.i_flags;
+ if (tvp &&
+ ((VTON(tvp)->nn_inode.i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
+ (tdflags & APPEND))) {
+ error = EPERM;
+ goto abortit;
+ }
+
+ /*
+ * Renaming a file to itself has no effect. The upper layers should
+ * not call us in that case. Temporarily just warn if they do.
+ */
+ if (fvp == tvp) {
+ printf("nandfs_rename: fvp == tvp (can't happen)\n");
+ error = 0;
+ goto abortit;
+ }
+
+ if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
+ goto abortit;
+
+ fdnode = VTON(fdvp);
+ fnode = VTON(fvp);
+
+ if (fnode->nn_inode.i_links_count >= LINK_MAX) {
+ VOP_UNLOCK(fvp, 0);
+ error = EMLINK;
+ goto abortit;
+ }
+
+ fflags = fnode->nn_inode.i_flags;
+ fdflags = fdnode->nn_inode.i_flags;
+
+ if ((fflags & (NOUNLINK | IMMUTABLE | APPEND)) ||
+ (fdflags & APPEND)) {
+ VOP_UNLOCK(fvp, 0);
+ error = EPERM;
+ goto abortit;
+ }
+
+ mode = fnode->nn_inode.i_mode;
+ if ((mode & S_IFMT) == S_IFDIR) {
+ /*
+ * Avoid ".", "..", and aliases of "." for obvious reasons.
+ */
+
+ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+ (fdvp == fvp) ||
+ ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) ||
+ (fnode->nn_flags & IN_RENAME)) {
+ VOP_UNLOCK(fvp, 0);
+ error = EINVAL;
+ goto abortit;
+ }
+ fnode->nn_flags |= IN_RENAME;
+ doingdirectory = 1;
+ DPRINTF(VNCALL, ("%s: doingdirectory dvp %p\n", __func__,
+ tdvp));
+ oldparent = fdnode->nn_ino;
+ }
+
+ vrele(fdvp);
+
+ tnode = NULL;
+ if (tvp)
+ tnode = VTON(tvp);
+
+ /*
+ * Bump link count on fvp while we are moving stuff around. If we
+ * crash before completing the work, the link count may be wrong
+ * but correctable.
+ */
+ fnode->nn_inode.i_links_count++;
+
+ /* Check for in path moving XXX */
+ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
+ VOP_UNLOCK(fvp, 0);
+ if (oldparent != tdnode->nn_ino)
+ newparent = tdnode->nn_ino;
+ if (doingdirectory && newparent) {
+ if (error) /* write access check above */
+ goto bad;
+ if (tnode != NULL)
+ vput(tvp);
+
+ error = nandfs_checkpath(fnode, tdnode, tcnp->cn_cred);
+ if (error)
+ goto out;
+
+ VREF(tdvp);
+ error = relookup(tdvp, &tvp, tcnp);
+ if (error)
+ goto out;
+ vrele(tdvp);
+ tdnode = VTON(tdvp);
+ tnode = NULL;
+ if (tvp)
+ tnode = VTON(tvp);
+ }
+
+ /*
+ * If the target doesn't exist, link the target to the source and
+ * unlink the source. Otherwise, rewrite the target directory to
+ * reference the source and remove the original entry.
+ */
+
+ if (tvp == NULL) {
+ /*
+ * Account for ".." in new directory.
+ */
+ if (doingdirectory && fdvp != tdvp)
+ tdnode->nn_inode.i_links_count++;
+
+ DPRINTF(VNCALL, ("%s: new entry in dvp:%p\n", __func__, tdvp));
+ /*
+ * Add name in new directory.
+ */
+ error = nandfs_add_dirent(tdvp, fnode->nn_ino, tcnp->cn_nameptr,
+ tcnp->cn_namelen, IFTODT(fnode->nn_inode.i_mode));
+ if (error) {
+ if (doingdirectory && fdvp != tdvp)
+ tdnode->nn_inode.i_links_count--;
+ goto bad;
+ }
+
+ vput(tdvp);
+ } else {
+ /*
+ * If the parent directory is "sticky", then the user must
+ * own the parent directory, or the destination of the rename,
+ * otherwise the destination may not be changed (except by
+ * root). This implements append-only directories.
+ */
+ if ((tdnode->nn_inode.i_mode & S_ISTXT) &&
+ tcnp->cn_cred->cr_uid != 0 &&
+ tcnp->cn_cred->cr_uid != tdnode->nn_inode.i_uid &&
+ tnode->nn_inode.i_uid != tcnp->cn_cred->cr_uid) {
+ error = EPERM;
+ goto bad;
+ }
+ /*
+ * Target must be empty if a directory and have no links
+ * to it. Also, ensure source and target are compatible
+ * (both directories, or both not directories).
+ */
+ mode = tnode->nn_inode.i_mode;
+ if ((mode & S_IFMT) == S_IFDIR) {
+ if (!nandfs_dirempty(tvp, tdnode->nn_ino,
+ tcnp->cn_cred)) {
+ error = ENOTEMPTY;
+ goto bad;
+ }
+ if (!doingdirectory) {
+ error = ENOTDIR;
+ goto bad;
+ }
+ /*
+ * Update name cache since directory is going away.
+ */
+ cache_purge(tdvp);
+ } else if (doingdirectory) {
+ error = EISDIR;
+ goto bad;
+ }
+
+ DPRINTF(VNCALL, ("%s: update entry dvp:%p\n", __func__, tdvp));
+ /*
+ * Change name tcnp in tdvp to point at fvp.
+ */
+ error = nandfs_update_dirent(tdvp, fnode, tnode);
+ if (error)
+ goto bad;
+
+ if (doingdirectory && !newparent)
+ tdnode->nn_inode.i_links_count--;
+
+ vput(tdvp);
+
+ tnode->nn_inode.i_links_count--;
+ vput(tvp);
+ tnode = NULL;
+ }
+
+ /*
+ * Unlink the source.
+ */
+ fcnp->cn_flags &= ~MODMASK;
+ fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+ VREF(fdvp);
+ error = relookup(fdvp, &fvp, fcnp);
+ if (error == 0)
+ vrele(fdvp);
+ if (fvp != NULL) {
+ fnode1 = VTON(fvp);
+ fdnode = VTON(fdvp);
+ } else {
+ /*
+ * From name has disappeared.
+ */
+ if (doingdirectory)
+ panic("nandfs_rename: lost dir entry");
+ vrele(ap->a_fvp);
+ return (0);
+ }
+
+ DPRINTF(VNCALL, ("%s: unlink source fnode:%p\n", __func__, fnode));
+
+ /*
+ * Ensure that the directory entry still exists and has not
+ * changed while the new name has been entered. If the source is
+ * a file then the entry may have been unlinked or renamed. In
+ * either case there is no further work to be done. If the source
+ * is a directory then it cannot have been rmdir'ed; its link
+ * count of three would cause a rmdir to fail with ENOTEMPTY.
+ * The IN_RENAME flag ensures that it cannot be moved by another
+ * rename.
+ */
+ if (fnode != fnode1) {
+ if (doingdirectory)
+ panic("nandfs: lost dir entry");
+ } else {
+ /*
+ * If the source is a directory with a
+ * new parent, the link count of the old
+ * parent directory must be decremented
+ * and ".." set to point to the new parent.
+ */
+ if (doingdirectory && newparent) {
+ DPRINTF(VNCALL, ("%s: new parent %#jx -> %#jx\n",
+ __func__, (uintmax_t) oldparent,
+ (uintmax_t) newparent));
+ error = nandfs_update_parent_dir(fvp, newparent);
+ if (!error) {
+ fdnode->nn_inode.i_links_count--;
+ fdnode->nn_flags |= IN_CHANGE;
+ }
+ }
+ error = nandfs_remove_dirent(fdvp, fnode, fcnp);
+ if (!error) {
+ fnode->nn_inode.i_links_count--;
+ fnode->nn_flags |= IN_CHANGE;
+ }
+ fnode->nn_flags &= ~IN_RENAME;
+ }
+ if (fdnode)
+ vput(fdvp);
+ if (fnode)
+ vput(fvp);
+ vrele(ap->a_fvp);
+ return (error);
+
+bad:
+ DPRINTF(VNCALL, ("%s: error:%d\n", __func__, error));
+ if (tnode)
+ vput(NTOV(tnode));
+ vput(NTOV(tdnode));
+out:
+ if (doingdirectory)
+ fnode->nn_flags &= ~IN_RENAME;
+ if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
+ fnode->nn_inode.i_links_count--;
+ fnode->nn_flags |= IN_CHANGE;
+ fnode->nn_flags &= ~IN_RENAME;
+ vput(fvp);
+ } else
+ vrele(fvp);
+ return (error);
+}
+
+static int
+nandfs_mkdir(struct vop_mkdir_args *ap)
+{
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfs_inode *dir_inode = &dir_node->nn_inode;
+ struct nandfs_node *node;
+ struct nandfsmount *nmp = dir_node->nn_nmp;
+ uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ int error;
+
+ DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
+
+ if (nandfs_fs_full(dir_node->nn_nandfsdev))
+ return (ENOSPC);
+
+ if (dir_inode->i_links_count >= LINK_MAX)
+ return (EMLINK);
+
+ error = nandfs_node_create(nmp, &node, mode);
+ if (error)
+ return (error);
+
+ node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
+ node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
+
+ *vpp = NTOV(node);
+
+ error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(mode));
+ if (error) {
+ vput(*vpp);
+ return (error);
+ }
+
+ dir_node->nn_inode.i_links_count++;
+ dir_node->nn_flags |= IN_CHANGE;
+
+ error = nandfs_init_dir(NTOV(node), node->nn_ino, dir_node->nn_ino);
+ if (error) {
+ vput(NTOV(node));
+ return (error);
+ }
+
+ DPRINTF(VNCALL, ("created dir vp %p nandnode %p ino %jx\n", *vpp, node,
+ (uintmax_t)node->nn_ino));
+ return (0);
+}
+
+static int
+nandfs_mknod(struct vop_mknod_args *ap)
+{
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ struct vattr *vap = ap->a_vap;
+ uint16_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfsmount *nmp = dir_node->nn_nmp;
+ struct nandfs_node *node;
+ int error;
+
+ if (nandfs_fs_full(dir_node->nn_nandfsdev))
+ return (ENOSPC);
+
+ error = nandfs_node_create(nmp, &node, mode);
+ if (error)
+ return (error);
+ node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
+ node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
+ if (vap->va_rdev != VNOVAL)
+ node->nn_inode.i_special = vap->va_rdev;
+
+ *vpp = NTOV(node);
+
+ if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(mode))) {
+ vput(*vpp);
+ return (ENOTDIR);
+ }
+
+ node->nn_flags |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+
+ return (0);
+}
+
+static int
+nandfs_symlink(struct vop_symlink_args *ap)
+{
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfsmount *nmp = dir_node->nn_nmp;
+ struct nandfs_node *node;
+ int len, error;
+
+ if (nandfs_fs_full(dir_node->nn_nandfsdev))
+ return (ENOSPC);
+
+ error = nandfs_node_create(nmp, &node, S_IFLNK | mode);
+ if (error)
+ return (error);
+ node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
+ node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
+
+ *vpp = NTOV(node);
+
+ if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(mode))) {
+ vput(*vpp);
+ return (ENOTDIR);
+ }
+
+
+ len = strlen(ap->a_target);
+ error = vn_rdwr(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
+ UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
+ cnp->cn_cred, NOCRED, NULL, NULL);
+ if (error)
+ vput(*vpp);
+
+ return (error);
+}
+
+static int
+nandfs_readlink(struct vop_readlink_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+
+ return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+static int
+nandfs_rmdir(struct vop_rmdir_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct vnode *dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *node, *dnode;
+ uint32_t dflag, flag;
+ int error = 0;
+
+ node = VTON(vp);
+ dnode = VTON(dvp);
+
+ /* Files marked as immutable or append-only cannot be deleted. */
+ if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
+ (dnode->nn_inode.i_flags & APPEND))
+ return (EPERM);
+
+ DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx\n", __func__,
+ dvp, vp, node, (uintmax_t)node->nn_ino));
+
+ if (node->nn_inode.i_links_count < 2)
+ return (EINVAL);
+
+ if (!nandfs_dirempty(vp, dnode->nn_ino, cnp->cn_cred))
+ return (ENOTEMPTY);
+
+ /* Files marked as immutable or append-only cannot be deleted. */
+ dflag = dnode->nn_inode.i_flags;
+ flag = node->nn_inode.i_flags;
+ if ((dflag & APPEND) ||
+ (flag & (NOUNLINK | IMMUTABLE | APPEND))) {
+ return (EPERM);
+ }
+
+ if (vp->v_mountedhere != 0)
+ return (EINVAL);
+
+ nandfs_remove_dirent(dvp, node, cnp);
+ dnode->nn_inode.i_links_count -= 1;
+ dnode->nn_flags |= IN_CHANGE;
+
+ cache_purge(dvp);
+
+ error = nandfs_truncate(vp, (uint64_t)0);
+ if (error)
+ return (error);
+
+ node->nn_inode.i_links_count -= 2;
+ node->nn_flags |= IN_CHANGE;
+
+ cache_purge(vp);
+
+ return (error);
+}
+
+static int
+nandfs_fsync(struct vop_fsync_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ int locked;
+
+ DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
+ node, (uintmax_t)node->nn_ino));
+
+ /*
+ * Start syncing vnode only if inode was modified or
+ * there are some dirty buffers
+ */
+ if (VTON(vp)->nn_flags & IN_MODIFIED ||
+ vp->v_bufobj.bo_dirty.bv_cnt) {
+ locked = VOP_ISLOCKED(vp);
+ VOP_UNLOCK(vp, 0);
+ nandfs_wakeup_wait_sync(node->nn_nandfsdev, SYNCER_FSYNC);
+ VOP_LOCK(vp, locked | LK_RETRY);
+ }
+
+ return (0);
+}
+
+static int
+nandfs_bmap(struct vop_bmap_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *nnode = VTON(vp);
+ struct nandfs_device *nandfsdev = nnode->nn_nandfsdev;
+ nandfs_daddr_t l2vmap, v2pmap;
+ int error;
+ int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
+
+ DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
+ nnode, (uintmax_t)nnode->nn_ino));
+
+ if (ap->a_bop != NULL)
+ *ap->a_bop = &nandfsdev->nd_devvp->v_bufobj;
+ if (ap->a_bnp == NULL)
+ return (0);
+ if (ap->a_runp != NULL)
+ *ap->a_runp = 0;
+ if (ap->a_runb != NULL)
+ *ap->a_runb = 0;
+
+ /*
+ * Translate all the block sectors into a series of buffers to read
+ * asynchronously from the nandfs device. Note that this lookup may
+ * induce readin's too.
+ */
+
+ /* Get virtual block numbers for the vnode's buffer span */
+ error = nandfs_bmap_lookup(nnode, ap->a_bn, &l2vmap);
+ if (error)
+ return (-1);
+
+ /* Translate virtual block numbers to physical block numbers */
+ error = nandfs_vtop(nnode, l2vmap, &v2pmap);
+ if (error)
+ return (-1);
+
+ /* Note virtual block 0 marks not mapped */
+ if (l2vmap == 0)
+ *ap->a_bnp = -1;
+ else
+ *ap->a_bnp = v2pmap * blk2dev; /* in DEV_BSIZE */
+
+ DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx lblk %jx -> blk %jx\n",
+ __func__, vp, nnode, (uintmax_t)nnode->nn_ino, (uintmax_t)ap->a_bn,
+ (uintmax_t)*ap->a_bnp ));
+
+ return (0);
+}
+
+static void
+nandfs_force_syncer(struct nandfsmount *nmp)
+{
+
+ nmp->nm_flags |= NANDFS_FORCE_SYNCER;
+ nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_FFORCE);
+}
+
+static int
+nandfs_ioctl(struct vop_ioctl_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ u_long command = ap->a_command;
+ caddr_t data = ap->a_data;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_device *nandfsdev = node->nn_nandfsdev;
+ struct nandfsmount *nmp = node->nn_nmp;
+ uint64_t *tab, *cno;
+ struct nandfs_seg_stat *nss;
+ struct nandfs_cpmode *ncpm;
+ struct nandfs_argv *nargv;
+ struct nandfs_cpstat *ncp;
+ int error;
+
+ DPRINTF(VNCALL, ("%s: %x\n", __func__, (uint32_t)command));
+
+ error = priv_check(ap->a_td, PRIV_VFS_MOUNT);
+ if (error)
+ return (error);
+
+ if (nmp->nm_ronly) {
+ switch (command) {
+ case NANDFS_IOCTL_GET_FSINFO:
+ case NANDFS_IOCTL_GET_SUSTAT:
+ case NANDFS_IOCTL_GET_CPINFO:
+ case NANDFS_IOCTL_GET_CPSTAT:
+ case NANDFS_IOCTL_GET_SUINFO:
+ case NANDFS_IOCTL_GET_VINFO:
+ case NANDFS_IOCTL_GET_BDESCS:
+ break;
+ default:
+ return (EROFS);
+ }
+ }
+
+ switch (command) {
+ case NANDFS_IOCTL_GET_FSINFO:
+ error = nandfs_get_fsinfo(nmp, (struct nandfs_fsinfo *)data);
+ break;
+ case NANDFS_IOCTL_GET_SUSTAT:
+ nss = (struct nandfs_seg_stat *)data;
+ error = nandfs_get_seg_stat(nandfsdev, nss);
+ break;
+ case NANDFS_IOCTL_CHANGE_CPMODE:
+ ncpm = (struct nandfs_cpmode *)data;
+ error = nandfs_chng_cpmode(nandfsdev->nd_cp_node, ncpm);
+ nandfs_force_syncer(nmp);
+ break;
+ case NANDFS_IOCTL_GET_CPINFO:
+ nargv = (struct nandfs_argv *)data;
+ error = nandfs_get_cpinfo_ioctl(nandfsdev->nd_cp_node, nargv);
+ break;
+ case NANDFS_IOCTL_DELETE_CP:
+ tab = (uint64_t *)data;
+ error = nandfs_delete_cp(nandfsdev->nd_cp_node, tab[0], tab[1]);
+ nandfs_force_syncer(nmp);
+ break;
+ case NANDFS_IOCTL_GET_CPSTAT:
+ ncp = (struct nandfs_cpstat *)data;
+ error = nandfs_get_cpstat(nandfsdev->nd_cp_node, ncp);
+ break;
+ case NANDFS_IOCTL_GET_SUINFO:
+ nargv = (struct nandfs_argv *)data;
+ error = nandfs_get_segment_info_ioctl(nandfsdev, nargv);
+ break;
+ case NANDFS_IOCTL_GET_VINFO:
+ nargv = (struct nandfs_argv *)data;
+ error = nandfs_get_dat_vinfo_ioctl(nandfsdev, nargv);
+ break;
+ case NANDFS_IOCTL_GET_BDESCS:
+ nargv = (struct nandfs_argv *)data;
+ error = nandfs_get_dat_bdescs_ioctl(nandfsdev, nargv);
+ break;
+ case NANDFS_IOCTL_SYNC:
+ cno = (uint64_t *)data;
+ nandfs_force_syncer(nmp);
+ *cno = nandfsdev->nd_last_cno;
+ error = 0;
+ break;
+ case NANDFS_IOCTL_MAKE_SNAP:
+ cno = (uint64_t *)data;
+ error = nandfs_make_snap(nandfsdev, cno);
+ nandfs_force_syncer(nmp);
+ break;
+ case NANDFS_IOCTL_DELETE_SNAP:
+ cno = (uint64_t *)data;
+ error = nandfs_delete_snap(nandfsdev, *cno);
+ nandfs_force_syncer(nmp);
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * Whiteout vnode call
+ */
+static int
+nandfs_whiteout(struct vop_whiteout_args *ap)
+{
+ struct vnode *dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ int error = 0;
+
+ switch (ap->a_flags) {
+ case LOOKUP:
+ return (0);
+ case CREATE:
+ /* Create a new directory whiteout */
+#ifdef INVARIANTS
+ if ((cnp->cn_flags & SAVENAME) == 0)
+ panic("ufs_whiteout: missing name");
+#endif
+ error = nandfs_add_dirent(dvp, NANDFS_WHT_INO, cnp->cn_nameptr,
+ cnp->cn_namelen, DT_WHT);
+ break;
+
+ case DELETE:
+ /* Remove an existing directory whiteout */
+ cnp->cn_flags &= ~DOWHITEOUT;
+ error = nandfs_remove_dirent(dvp, NULL, cnp);
+ break;
+ default:
+ panic("nandf_whiteout: unknown op: %d", ap->a_flags);
+ }
+
+ return (error);
+}
+
+static int
+nandfs_pathconf(struct vop_pathconf_args *ap)
+{
+ int error;
+
+ error = 0;
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ break;
+ case _PC_NAME_MAX:
+ *ap->a_retval = NAME_MAX;
+ break;
+ case _PC_PATH_MAX:
+ *ap->a_retval = PATH_MAX;
+ break;
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ break;
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ break;
+ case _PC_NO_TRUNC:
+ *ap->a_retval = 1;
+ break;
+ case _PC_ACL_EXTENDED:
+ *ap->a_retval = 0;
+ break;
+ case _PC_ALLOC_SIZE_MIN:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
+ break;
+ case _PC_FILESIZEBITS:
+ *ap->a_retval = 64;
+ break;
+ case _PC_REC_INCR_XFER_SIZE:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+ break;
+ case _PC_REC_MAX_XFER_SIZE:
+ *ap->a_retval = -1; /* means ``unlimited'' */
+ break;
+ case _PC_REC_MIN_XFER_SIZE:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return (error);
+}
+
+static int
+nandfs_vnlock1(struct vop_lock1_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ int error, vi_locked;
+
+ /*
+ * XXX can vnode go away while we are sleeping?
+ */
+ vi_locked = mtx_owned(&vp->v_interlock);
+ if (vi_locked)
+ VI_UNLOCK(vp);
+ error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev,
+ ap->a_flags & LK_NOWAIT);
+ if (vi_locked && !error)
+ VI_LOCK(vp);
+ if (error)
+ return (error);
+
+ error = vop_stdlock(ap);
+ if (error) {
+ NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+nandfs_vnunlock(struct vop_unlock_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ int error;
+
+ error = vop_stdunlock(ap);
+ if (error)
+ return (error);
+
+ NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
+
+ return (0);
+}
+
+/*
+ * Global vfs data structures
+ */
+struct vop_vector nandfs_vnodeops = {
+ .vop_default = &default_vnodeops,
+ .vop_access = nandfs_access,
+ .vop_advlock = nandfs_advlock,
+ .vop_bmap = nandfs_bmap,
+ .vop_close = nandfs_close,
+ .vop_create = nandfs_create,
+ .vop_fsync = nandfs_fsync,
+ .vop_getattr = nandfs_getattr,
+ .vop_inactive = nandfs_inactive,
+ .vop_cachedlookup = nandfs_lookup,
+ .vop_ioctl = nandfs_ioctl,
+ .vop_link = nandfs_link,
+ .vop_lookup = vfs_cache_lookup,
+ .vop_mkdir = nandfs_mkdir,
+ .vop_mknod = nandfs_mknod,
+ .vop_open = nandfs_open,
+ .vop_pathconf = nandfs_pathconf,
+ .vop_print = nandfs_print,
+ .vop_read = nandfs_read,
+ .vop_readdir = nandfs_readdir,
+ .vop_readlink = nandfs_readlink,
+ .vop_reclaim = nandfs_reclaim,
+ .vop_remove = nandfs_remove,
+ .vop_rename = nandfs_rename,
+ .vop_rmdir = nandfs_rmdir,
+ .vop_whiteout = nandfs_whiteout,
+ .vop_write = nandfs_write,
+ .vop_setattr = nandfs_setattr,
+ .vop_strategy = nandfs_strategy,
+ .vop_symlink = nandfs_symlink,
+ .vop_lock1 = nandfs_vnlock1,
+ .vop_unlock = nandfs_vnunlock,
+};
+
+struct vop_vector nandfs_system_vnodeops = {
+ .vop_default = &default_vnodeops,
+ .vop_close = nandfs_close,
+ .vop_inactive = nandfs_inactive,
+ .vop_reclaim = nandfs_reclaim,
+ .vop_strategy = nandfs_strategy,
+ .vop_fsync = nandfs_fsync,
+ .vop_bmap = nandfs_bmap,
+ .vop_access = VOP_PANIC,
+ .vop_advlock = VOP_PANIC,
+ .vop_create = VOP_PANIC,
+ .vop_getattr = VOP_PANIC,
+ .vop_cachedlookup = VOP_PANIC,
+ .vop_ioctl = VOP_PANIC,
+ .vop_link = VOP_PANIC,
+ .vop_lookup = VOP_PANIC,
+ .vop_mkdir = VOP_PANIC,
+ .vop_mknod = VOP_PANIC,
+ .vop_open = VOP_PANIC,
+ .vop_pathconf = VOP_PANIC,
+ .vop_print = VOP_PANIC,
+ .vop_read = VOP_PANIC,
+ .vop_readdir = VOP_PANIC,
+ .vop_readlink = VOP_PANIC,
+ .vop_remove = VOP_PANIC,
+ .vop_rename = VOP_PANIC,
+ .vop_rmdir = VOP_PANIC,
+ .vop_whiteout = VOP_PANIC,
+ .vop_write = VOP_PANIC,
+ .vop_setattr = VOP_PANIC,
+ .vop_symlink = VOP_PANIC,
+};
+
+static int
+nandfsfifo_close(struct vop_close_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+
+ DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
+
+ mtx_lock(&vp->v_interlock);
+ if (vp->v_usecount > 1)
+ nandfs_itimes_locked(vp);
+ mtx_unlock(&vp->v_interlock);
+
+ return (fifo_specops.vop_close(ap));
+}
+
+struct vop_vector nandfs_fifoops = {
+ .vop_default = &fifo_specops,
+ .vop_fsync = VOP_PANIC,
+ .vop_access = nandfs_access,
+ .vop_close = nandfsfifo_close,
+ .vop_getattr = nandfs_getattr,
+ .vop_inactive = nandfs_inactive,
+ .vop_print = nandfs_print,
+ .vop_read = VOP_PANIC,
+ .vop_reclaim = nandfs_reclaim,
+ .vop_setattr = nandfs_setattr,
+ .vop_write = VOP_PANIC,
+ .vop_lock1 = nandfs_vnlock1,
+ .vop_unlock = nandfs_vnunlock,
+};
+
+int
+nandfs_vinit(struct vnode *vp, uint64_t ino)
+{
+ struct nandfs_node *node;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ node = VTON(vp);
+
+ /* Check if we're fetching the root */
+ if (ino == NANDFS_ROOT_INO)
+ vp->v_vflag |= VV_ROOT;
+
+ if (ino != NANDFS_GC_INO)
+ vp->v_type = IFTOVT(node->nn_inode.i_mode);
+ else
+ vp->v_type = VREG;
+
+ if (vp->v_type == VFIFO)
+ vp->v_op = &nandfs_fifoops;
+
+ return (0);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nfs/nfs_commonacl.c
--- a/head/sys/fs/nfs/nfs_commonacl.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nfs/nfs_commonacl.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/nfs/nfs_commonacl.c 224086 2011-07-16 08:51:09Z zack $");
+__FBSDID("$FreeBSD: head/sys/fs/nfs/nfs_commonacl.c 235568 2012-05-17 21:52:17Z rmacklem $");
#ifndef APPLEKEXT
#include <fs/nfs/nfsport.h>
@@ -468,9 +468,7 @@
error = NFSERR_ATTRNOTSUPP;
goto out;
}
- error = VOP_ACLCHECK(vp, ACL_TYPE_NFS4, aclp, cred, p);
- if (!error)
- error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
+ error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
out:
NFSEXITCODE(error);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nfsclient/nfs_clbio.c
--- a/head/sys/fs/nfsclient/nfs_clbio.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nfsclient/nfs_clbio.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clbio.c 233101 2012-03-17 23:03:20Z kib $");
+__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clbio.c 237987 2012-07-02 09:53:08Z kib $");
#include "opt_kdtrace.h"
@@ -281,7 +281,11 @@
vp = ap->a_vp;
np = VTONFS(vp);
td = curthread; /* XXX */
- cred = curthread->td_ucred; /* XXX */
+ /* Set the cred to n_writecred for the write rpcs. */
+ if (np->n_writecred != NULL)
+ cred = crhold(np->n_writecred);
+ else
+ cred = crhold(curthread->td_ucred); /* XXX */
nmp = VFSTONFS(vp->v_mount);
pages = ap->a_m;
count = ap->a_count;
@@ -345,6 +349,7 @@
iomode = NFSWRITE_FILESYNC;
error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0);
+ crfree(cred);
pmap_qremove(kva, npages);
relpbuf(bp, &ncl_pbuf_freecnt);
@@ -717,7 +722,7 @@
};
if (n > 0) {
- error = uiomove(bp->b_data + on, (int)n, uio);
+ error = vn_io_fault_uiomove(bp->b_data + on, (int)n, uio);
}
if (vp->v_type == VLNK)
n = 0;
@@ -892,8 +897,9 @@
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
daddr_t lbn;
int bcount;
- int n, on, error = 0;
- off_t tmp_off;
+ int bp_cached, n, on, error = 0, error1;
+ size_t orig_resid, local_resid;
+ off_t orig_size, tmp_off;
KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode"));
KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
@@ -945,6 +951,11 @@
mtx_unlock(&np->n_mtx);
}
+ orig_resid = uio->uio_resid;
+ mtx_lock(&np->n_mtx);
+ orig_size = np->n_size;
+ mtx_unlock(&np->n_mtx);
+
/*
* If IO_APPEND then load uio_offset. We restart here if we cannot
* get the append lock.
@@ -1122,7 +1133,10 @@
* normally.
*/
+ bp_cached = 1;
if (on == 0 && n == bcount) {
+ if ((bp->b_flags & B_CACHE) == 0)
+ bp_cached = 0;
bp->b_flags |= B_CACHE;
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
@@ -1173,7 +1187,7 @@
* significant cache coherency problems with multiple clients,
* especially if locking is implemented later on.
*
- * as an optimization we could theoretically maintain
+ * As an optimization we could theoretically maintain
* a linked list of discontinuous areas, but we would still
* have to commit them separately so there isn't much
* advantage to it except perhaps a bit of asynchronization.
@@ -1188,7 +1202,23 @@
goto again;
}
- error = uiomove((char *)bp->b_data + on, n, uio);
+ local_resid = uio->uio_resid;
+ error = vn_io_fault_uiomove((char *)bp->b_data + on, n, uio);
+
+ if (error != 0 && !bp_cached) {
+ /*
+ * This block has no other content then what
+ * possibly was written by the faulty uiomove.
+ * Release it, forgetting the data pages, to
+ * prevent the leak of uninitialized data to
+ * usermode.
+ */
+ bp->b_ioflags |= BIO_ERROR;
+ brelse(bp);
+ uio->uio_offset -= local_resid - uio->uio_resid;
+ uio->uio_resid = local_resid;
+ break;
+ }
/*
* Since this block is being modified, it must be written
@@ -1198,17 +1228,18 @@
*/
bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
- if (error) {
- bp->b_ioflags |= BIO_ERROR;
- brelse(bp);
- break;
- }
+ /*
+ * Get the partial update on the progress made from
+ * uiomove, if an error occured.
+ */
+ if (error != 0)
+ n = local_resid - uio->uio_resid;
/*
* Only update dirtyoff/dirtyend if not a degenerate
* condition.
*/
- if (n) {
+ if (n > 0) {
if (bp->b_dirtyend > 0) {
bp->b_dirtyoff = min(on, bp->b_dirtyoff);
bp->b_dirtyend = max((on + n), bp->b_dirtyend);
@@ -1228,17 +1259,34 @@
if ((ioflag & IO_SYNC)) {
if (ioflag & IO_INVAL)
bp->b_flags |= B_NOCACHE;
- error = bwrite(bp);
- if (error)
+ error1 = bwrite(bp);
+ if (error1 != 0) {
+ if (error == 0)
+ error = error1;
break;
+ }
} else if ((n + on) == biosize) {
bp->b_flags |= B_ASYNC;
(void) ncl_writebp(bp, 0, NULL);
} else {
bdwrite(bp);
}
+
+ if (error != 0)
+ break;
} while (uio->uio_resid > 0 && n > 0);
+ if (error != 0) {
+ if (ioflag & IO_UNIT) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = orig_size;
+ /* IO_SYNC is handled implicitely */
+ (void)VOP_SETATTR(vp, &vattr, cred);
+ uio->uio_offset -= orig_resid - uio->uio_resid;
+ uio->uio_resid = orig_resid;
+ }
+ }
+
return (error);
}
@@ -1817,7 +1865,7 @@
* truncation point. We may have a B_DELWRI and/or B_CACHE
* buffer that now needs to be truncated.
*/
- error = vtruncbuf(vp, cred, td, nsize, biosize);
+ error = vtruncbuf(vp, cred, nsize, biosize);
lbn = nsize / biosize;
bufsize = nsize & (biosize - 1);
bp = nfs_getcacheblk(vp, lbn, bufsize, td);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nfsclient/nfs_clnode.c
--- a/head/sys/fs/nfsclient/nfs_clnode.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nfsclient/nfs_clnode.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clnode.c 230605 2012-01-27 02:46:12Z rmacklem $");
+__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clnode.c 237244 2012-06-18 22:17:28Z rmacklem $");
#include "opt_kdtrace.h"
@@ -210,18 +210,28 @@
struct nfsnode *np;
struct sillyrename *sp;
struct vnode *vp = ap->a_vp;
+ boolean_t retv;
np = VTONFS(vp);
if (NFS_ISV4(vp) && vp->v_type == VREG) {
/*
* Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4
- * Close operations are delayed until now. Any dirty buffers
- * must be flushed before the close, so that the stateid is
- * available for the writes.
+ * Close operations are delayed until now. Any dirty
+ * buffers/pages must be flushed before the close, so that the
+ * stateid is available for the writes.
*/
- (void) ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0);
- (void) nfsrpc_close(vp, 1, ap->a_td);
+ if (vp->v_object != NULL) {
+ VM_OBJECT_LOCK(vp->v_object);
+ retv = vm_object_page_clean(vp->v_object, 0, 0,
+ OBJPC_SYNC);
+ VM_OBJECT_UNLOCK(vp->v_object);
+ } else
+ retv = TRUE;
+ if (retv == TRUE) {
+ (void)ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0);
+ (void)nfsrpc_close(vp, 1, ap->a_td);
+ }
}
mtx_lock(&np->n_mtx);
@@ -257,15 +267,6 @@
struct nfsnode *np = VTONFS(vp);
struct nfsdmap *dp, *dp2;
- if (NFS_ISV4(vp) && vp->v_type == VREG)
- /*
- * Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4
- * Close operations are delayed until ncl_inactive().
- * However, since VOP_INACTIVE() is not guaranteed to be
- * called, we need to do it again here.
- */
- (void) nfsrpc_close(vp, 1, ap->a_td);
-
/*
* If the NLM is running, give it a chance to abort pending
* locks.
@@ -278,6 +279,15 @@
*/
vnode_destroy_vobject(vp);
+ if (NFS_ISV4(vp) && vp->v_type == VREG)
+ /*
+ * We can now safely close any remaining NFSv4 Opens for
+ * this file. Most opens will have already been closed by
+ * ncl_inactive(), but there are cases where it is not
+ * called, so we need to do it again here.
+ */
+ (void) nfsrpc_close(vp, 1, ap->a_td);
+
vfs_hash_remove(vp);
/*
@@ -300,6 +310,8 @@
FREE((caddr_t)dp2, M_NFSDIROFF);
}
}
+ if (np->n_writecred != NULL)
+ crfree(np->n_writecred);
FREE((caddr_t)np->n_fhp, M_NFSFH);
if (np->n_v4 != NULL)
FREE((caddr_t)np->n_v4, M_NFSV4NODE);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nfsclient/nfs_clvfsops.c
--- a/head/sys/fs/nfsclient/nfs_clvfsops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nfsclient/nfs_clvfsops.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 234386 2012-04-17 16:28:22Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 237367 2012-06-21 09:26:06Z kib $");
#include "opt_bootp.h"
@@ -1136,7 +1136,8 @@
out:
if (!error) {
MNT_ILOCK(mp);
- mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
+ mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
+ MNTK_NO_IOPF;
MNT_IUNLOCK(mp);
}
return (error);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nfsclient/nfs_clvnops.c
--- a/head/sys/fs/nfsclient/nfs_clvnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nfsclient/nfs_clvnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvnops.c 233101 2012-03-17 23:03:20Z kib $");
+__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvnops.c 235332 2012-05-12 12:02:51Z rmacklem $");
/*
* vnode op calls for Sun NFS version 2, 3 and 4
@@ -513,6 +513,7 @@
struct vattr vattr;
int error;
int fmode = ap->a_mode;
+ struct ucred *cred;
if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
return (EOPNOTSUPP);
@@ -604,7 +605,22 @@
}
np->n_directio_opens++;
}
+
+ /*
+ * If this is an open for writing, capture a reference to the
+ * credentials, so they can be used by ncl_putpages(). Using
+ * these write credentials is preferable to the credentials of
+ * whatever thread happens to be doing the VOP_PUTPAGES() since
+ * the write RPCs are less likely to fail with EACCES.
+ */
+ if ((fmode & FWRITE) != 0) {
+ cred = np->n_writecred;
+ np->n_writecred = crhold(ap->a_cred);
+ } else
+ cred = NULL;
mtx_unlock(&np->n_mtx);
+ if (cred != NULL)
+ crfree(cred);
vnode_create_vobject(vp, vattr.va_size, ap->a_td);
return (0);
}
@@ -1546,7 +1562,10 @@
(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
if (!error) {
newvp = NFSTOV(np);
- if (attrflag)
+ if (attrflag == 0)
+ error = nfsrpc_getattr(newvp, cnp->cn_cred,
+ cnp->cn_thread, &nfsva, NULL);
+ if (error == 0)
error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
0, 1);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nfsclient/nfsnode.h
--- a/head/sys/fs/nfsclient/nfsnode.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nfsclient/nfsnode.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/fs/nfsclient/nfsnode.h 230394 2012-01-20 20:02:01Z jhb $
+ * $FreeBSD: head/sys/fs/nfsclient/nfsnode.h 235332 2012-05-12 12:02:51Z rmacklem $
*/
#ifndef _NFSCLIENT_NFSNODE_H_
@@ -123,6 +123,7 @@
int n_directio_asyncwr;
u_int64_t n_change; /* old Change attribute */
struct nfsv4node *n_v4; /* extra V4 stuff */
+ struct ucred *n_writecred; /* Cred. for putpages */
};
#define n_atim n_un1.nf_atim
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nfsserver/nfs_nfsdport.c
--- a/head/sys/fs/nfsserver/nfs_nfsdport.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nfsserver/nfs_nfsdport.c Wed Jul 25 16:40:53 2012 +0300
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/nfsserver/nfs_nfsdport.c 234482 2012-04-20 06:50:44Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/fs/nfsserver/nfs_nfsdport.c 235136 2012-05-08 03:39:44Z jwd $");
#include <sys/capability.h>
@@ -505,11 +505,10 @@
out:
if (error) {
- uma_zfree(namei_zone, cnp->cn_pnbuf);
+ nfsvno_relpathbuf(ndp);
ndp->ni_vp = NULL;
ndp->ni_dvp = NULL;
ndp->ni_startdir = NULL;
- cnp->cn_flags &= ~HASBUF;
} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
ndp->ni_dvp = NULL;
}
@@ -1047,6 +1046,8 @@
else
vput(ndp->ni_dvp);
vput(vp);
+ if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
+ nfsvno_relpathbuf(ndp);
NFSEXITCODE(error);
return (error);
}
@@ -1086,6 +1087,8 @@
else
vput(ndp->ni_dvp);
vput(vp);
+ if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
+ nfsvno_relpathbuf(ndp);
NFSEXITCODE(error);
return (error);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nfsserver/nfs_nfsdstate.c
--- a/head/sys/fs/nfsserver/nfs_nfsdstate.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nfsserver/nfs_nfsdstate.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/nfsserver/nfs_nfsdstate.c 231949 2012-02-21 01:05:12Z kib $");
+__FBSDID("$FreeBSD: head/sys/fs/nfsserver/nfs_nfsdstate.c 235381 2012-05-12 22:20:55Z rmacklem $");
#ifndef APPLEKEXT
#include <fs/nfs/nfsport.h>
@@ -331,11 +331,13 @@
* Must wait until any outstanding callback on the old clp
* completes.
*/
+ NFSLOCKSTATE();
while (clp->lc_cbref) {
clp->lc_flags |= LCL_WAKEUPWANTED;
- (void) tsleep((caddr_t)clp, PZERO - 1,
+ (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
"nfsd clp", 10 * hz);
}
+ NFSUNLOCKSTATE();
nfsrv_zapclient(clp, p);
*new_clpp = NULL;
goto out;
@@ -385,10 +387,13 @@
* Must wait until any outstanding callback on the old clp
* completes.
*/
+ NFSLOCKSTATE();
while (clp->lc_cbref) {
clp->lc_flags |= LCL_WAKEUPWANTED;
- (void) tsleep((caddr_t)clp, PZERO - 1, "nfsd clp", 10 * hz);
+ (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp",
+ 10 * hz);
}
+ NFSUNLOCKSTATE();
nfsrv_zapclient(clp, p);
*new_clpp = NULL;
@@ -3816,11 +3821,9 @@
clp->lc_cbref--;
if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
clp->lc_flags &= ~LCL_WAKEUPWANTED;
- NFSUNLOCKSTATE();
- wakeup((caddr_t)clp);
- } else {
- NFSUNLOCKSTATE();
+ wakeup(clp);
}
+ NFSUNLOCKSTATE();
NFSEXITCODE(error);
return (error);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ntfs/ntfs.h
--- a/head/sys/fs/ntfs/ntfs.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ntfs/ntfs.h Wed Jul 25 16:40:53 2012 +0300
@@ -25,16 +25,16 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/fs/ntfs/ntfs.h 232100 2012-02-24 07:30:44Z kevlo $
+ * $FreeBSD: head/sys/fs/ntfs/ntfs.h 236140 2012-05-27 09:34:47Z ed $
*/
/*#define NTFS_DEBUG 1*/
-typedef u_int64_t cn_t;
-typedef u_int16_t wchar;
+typedef uint64_t cn_t;
+typedef uint16_t wchar;
#pragma pack(1)
-#define BBSIZE 1024
+#define BBSIZE 1024
#define BBOFF ((off_t)(0))
#define BBLOCK 0
#define NTFS_MFTINO 0
@@ -45,157 +45,157 @@
#define NTFS_BOOTINO 7
#define NTFS_BADCLUSINO 8
#define NTFS_UPCASEINO 10
-#define NTFS_MAXFILENAME 255
+#define NTFS_MAXFILENAME 255
struct fixuphdr {
- u_int32_t fh_magic;
- u_int16_t fh_foff;
- u_int16_t fh_fnum;
+ uint32_t fh_magic;
+ uint16_t fh_foff;
+ uint16_t fh_fnum;
};
-#define NTFS_AF_INRUN 0x00000001
+#define NTFS_AF_INRUN 0x00000001
struct attrhdr {
- u_int32_t a_type;
- u_int32_t reclen;
- u_int8_t a_flag;
- u_int8_t a_namelen;
- u_int8_t a_nameoff;
- u_int8_t reserved1;
- u_int8_t a_compression;
- u_int8_t reserved2;
- u_int16_t a_index;
+ uint32_t a_type;
+ uint32_t reclen;
+ uint8_t a_flag;
+ uint8_t a_namelen;
+ uint8_t a_nameoff;
+ uint8_t reserved1;
+ uint8_t a_compression;
+ uint8_t reserved2;
+ uint16_t a_index;
};
-#define NTFS_A_STD 0x10
-#define NTFS_A_ATTRLIST 0x20
-#define NTFS_A_NAME 0x30
-#define NTFS_A_VOLUMENAME 0x60
-#define NTFS_A_DATA 0x80
+#define NTFS_A_STD 0x10
+#define NTFS_A_ATTRLIST 0x20
+#define NTFS_A_NAME 0x30
+#define NTFS_A_VOLUMENAME 0x60
+#define NTFS_A_DATA 0x80
#define NTFS_A_INDXROOT 0x90
#define NTFS_A_INDX 0xA0
-#define NTFS_A_INDXBITMAP 0xB0
+#define NTFS_A_INDXBITMAP 0xB0
-#define NTFS_MAXATTRNAME 255
+#define NTFS_MAXATTRNAME 255
struct attr {
- struct attrhdr a_hdr;
+ struct attrhdr a_hdr;
union {
struct {
- u_int16_t a_datalen;
- u_int16_t reserved1;
- u_int16_t a_dataoff;
- u_int16_t a_indexed;
- } a_S_r;
+ uint16_t a_datalen;
+ uint16_t reserved1;
+ uint16_t a_dataoff;
+ uint16_t a_indexed;
+ } a_S_r;
struct {
- cn_t a_vcnstart;
- cn_t a_vcnend;
- u_int16_t a_dataoff;
- u_int16_t a_compressalg;
- u_int32_t reserved1;
- u_int64_t a_allocated;
- u_int64_t a_datalen;
- u_int64_t a_initialized;
- } a_S_nr;
- } a_S;
+ cn_t a_vcnstart;
+ cn_t a_vcnend;
+ uint16_t a_dataoff;
+ uint16_t a_compressalg;
+ uint32_t reserved1;
+ uint64_t a_allocated;
+ uint64_t a_datalen;
+ uint64_t a_initialized;
+ } a_S_nr;
+ } a_S;
};
-#define a_r a_S.a_S_r
-#define a_nr a_S.a_S_nr
+#define a_r a_S.a_S_r
+#define a_nr a_S.a_S_nr
typedef struct {
- u_int64_t t_create;
- u_int64_t t_write;
- u_int64_t t_mftwrite;
- u_int64_t t_access;
-} ntfs_times_t;
+ uint64_t t_create;
+ uint64_t t_write;
+ uint64_t t_mftwrite;
+ uint64_t t_access;
+} ntfs_times_t;
-#define NTFS_FFLAG_RDONLY 0x01LL
-#define NTFS_FFLAG_HIDDEN 0x02LL
-#define NTFS_FFLAG_SYSTEM 0x04LL
-#define NTFS_FFLAG_ARCHIVE 0x20LL
-#define NTFS_FFLAG_COMPRESSED 0x0800LL
-#define NTFS_FFLAG_DIR 0x10000000LL
+#define NTFS_FFLAG_RDONLY 0x01LL
+#define NTFS_FFLAG_HIDDEN 0x02LL
+#define NTFS_FFLAG_SYSTEM 0x04LL
+#define NTFS_FFLAG_ARCHIVE 0x20LL
+#define NTFS_FFLAG_COMPRESSED 0x0800LL
+#define NTFS_FFLAG_DIR 0x10000000LL
struct attr_name {
- u_int32_t n_pnumber; /* Parent ntnode */
- u_int32_t reserved;
- ntfs_times_t n_times;
- u_int64_t n_size;
- u_int64_t n_attrsz;
- u_int64_t n_flag;
- u_int8_t n_namelen;
- u_int8_t n_nametype;
- u_int16_t n_name[1];
+ uint32_t n_pnumber; /* Parent ntnode */
+ uint32_t reserved;
+ ntfs_times_t n_times;
+ uint64_t n_size;
+ uint64_t n_attrsz;
+ uint64_t n_flag;
+ uint8_t n_namelen;
+ uint8_t n_nametype;
+ uint16_t n_name[1];
};
-#define NTFS_IRFLAG_INDXALLOC 0x00000001
+#define NTFS_IRFLAG_INDXALLOC 0x00000001
struct attr_indexroot {
- u_int32_t ir_unkn1; /* always 0x30 */
- u_int32_t ir_unkn2; /* always 0x1 */
- u_int32_t ir_size;/* ??? */
- u_int32_t ir_unkn3; /* number of cluster */
- u_int32_t ir_unkn4; /* always 0x10 */
- u_int32_t ir_datalen; /* sizeof simething */
- u_int32_t ir_allocated; /* same as above */
- u_int16_t ir_flag;/* ?? always 1 */
- u_int16_t ir_unkn7;
+ uint32_t ir_unkn1; /* always 0x30 */
+ uint32_t ir_unkn2; /* always 0x1 */
+ uint32_t ir_size;/* ??? */
+ uint32_t ir_unkn3; /* number of cluster */
+ uint32_t ir_unkn4; /* always 0x10 */
+ uint32_t ir_datalen; /* sizeof simething */
+ uint32_t ir_allocated; /* same as above */
+ uint16_t ir_flag;/* ?? always 1 */
+ uint16_t ir_unkn7;
};
struct attr_attrlist {
- u_int32_t al_type; /* Attribute type */
- u_int16_t reclen; /* length of this entry */
- u_int8_t al_namelen; /* Attribute name len */
- u_int8_t al_nameoff; /* Name offset from entry start */
- u_int64_t al_vcnstart; /* VCN number */
- u_int32_t al_inumber; /* Parent ntnode */
- u_int32_t reserved;
- u_int16_t al_index; /* Attribute index in MFT record */
- u_int16_t al_name[1]; /* Name */
+ uint32_t al_type; /* Attribute type */
+ uint16_t reclen; /* length of this entry */
+ uint8_t al_namelen; /* Attribute name len */
+ uint8_t al_nameoff; /* Name offset from entry start */
+ uint64_t al_vcnstart; /* VCN number */
+ uint32_t al_inumber; /* Parent ntnode */
+ uint32_t reserved;
+ uint16_t al_index; /* Attribute index in MFT record */
+ uint16_t al_name[1]; /* Name */
};
-#define NTFS_INDXMAGIC (u_int32_t)(0x58444E49)
+#define NTFS_INDXMAGIC (uint32_t)(0x58444E49)
struct attr_indexalloc {
struct fixuphdr ia_fixup;
- u_int64_t unknown1;
- cn_t ia_bufcn;
- u_int16_t ia_hdrsize;
- u_int16_t unknown2;
- u_int32_t ia_inuse;
- u_int32_t ia_allocated;
+ uint64_t unknown1;
+ cn_t ia_bufcn;
+ uint16_t ia_hdrsize;
+ uint16_t unknown2;
+ uint32_t ia_inuse;
+ uint32_t ia_allocated;
};
#define NTFS_IEFLAG_SUBNODE 0x00000001
#define NTFS_IEFLAG_LAST 0x00000002
struct attr_indexentry {
- u_int32_t ie_number;
- u_int32_t unknown1;
- u_int16_t reclen;
- u_int16_t ie_size;
- u_int32_t ie_flag;/* 1 - has subnodes, 2 - last */
- u_int32_t ie_fpnumber;
- u_int32_t unknown2;
- ntfs_times_t ie_ftimes;
- u_int64_t ie_fallocated;
- u_int64_t ie_fsize;
- u_int64_t ie_fflag;
- u_int8_t ie_fnamelen;
- u_int8_t ie_fnametype;
- wchar ie_fname[NTFS_MAXFILENAME];
+ uint32_t ie_number;
+ uint32_t unknown1;
+ uint16_t reclen;
+ uint16_t ie_size;
+ uint32_t ie_flag; /* 1 - has subnodes, 2 - last */
+ uint32_t ie_fpnumber;
+ uint32_t unknown2;
+ ntfs_times_t ie_ftimes;
+ uint64_t ie_fallocated;
+ uint64_t ie_fsize;
+ uint64_t ie_fflag;
+ uint8_t ie_fnamelen;
+ uint8_t ie_fnametype;
+ wchar ie_fname[NTFS_MAXFILENAME];
/* cn_t ie_bufcn; buffer with subnodes */
};
-#define NTFS_FILEMAGIC (u_int32_t)(0x454C4946)
+#define NTFS_FILEMAGIC (uint32_t)(0x454C4946)
#define NTFS_BLOCK_SIZE 512
#define NTFS_FRFLAG_DIR 0x0002
struct filerec {
- struct fixuphdr fr_fixup;
- u_int8_t reserved[8];
- u_int16_t fr_seqnum; /* Sequence number */
- u_int16_t fr_nlink;
- u_int16_t fr_attroff; /* offset to attributes */
- u_int16_t fr_flags; /* 1-nonresident attr, 2-directory */
- u_int32_t fr_size;/* hdr + attributes */
- u_int32_t fr_allocated; /* allocated length of record */
- u_int64_t fr_mainrec; /* main record */
- u_int16_t fr_attrnum; /* maximum attr number + 1 ??? */
+ struct fixuphdr fr_fixup;
+ uint8_t reserved[8];
+ uint16_t fr_seqnum; /* Sequence number */
+ uint16_t fr_nlink;
+ uint16_t fr_attroff; /* offset to attributes */
+ uint16_t fr_flags; /* 1-nonresident attr, 2-directory */
+ uint32_t fr_size;/* hdr + attributes */
+ uint32_t fr_allocated; /* allocated length of record */
+ uint64_t fr_mainrec; /* main record */
+ uint16_t fr_attrnum; /* maximum attr number + 1 ??? */
};
#define NTFS_ATTRNAME_MAXLEN 0x40
@@ -203,66 +203,66 @@
#define NTFS_ADFLAG_INDEX 0x0002 /* Attrib can be indexed */
struct attrdef {
wchar ad_name[NTFS_ATTRNAME_MAXLEN];
- u_int32_t ad_type;
- u_int32_t reserved1[2];
- u_int32_t ad_flag;
- u_int64_t ad_minlen;
- u_int64_t ad_maxlen; /* -1 for nonlimited */
+ uint32_t ad_type;
+ uint32_t reserved1[2];
+ uint32_t ad_flag;
+ uint64_t ad_minlen;
+ uint64_t ad_maxlen; /* -1 for nonlimited */
};
struct ntvattrdef {
char ad_name[0x40];
int ad_namelen;
- u_int32_t ad_type;
+ uint32_t ad_type;
};
#define NTFS_BBID "NTFS "
#define NTFS_BBIDLEN 8
struct bootfile {
- u_int8_t reserved1[3]; /* asm jmp near ... */
- u_int8_t bf_sysid[8]; /* 'NTFS ' */
- u_int16_t bf_bps; /* bytes per sector */
- u_int8_t bf_spc; /* sectors per cluster */
- u_int8_t reserved2[7]; /* unused (zeroed) */
- u_int8_t bf_media; /* media desc. (0xF8) */
- u_int8_t reserved3[2];
- u_int16_t bf_spt; /* sectors per track */
- u_int16_t bf_heads; /* number of heads */
- u_int8_t reserver4[12];
- u_int64_t bf_spv; /* sectors per volume */
- cn_t bf_mftcn; /* $MFT cluster number */
- cn_t bf_mftmirrcn; /* $MFTMirr cn */
- u_int8_t bf_mftrecsz; /* MFT record size (clust) */
+ uint8_t reserved1[3]; /* asm jmp near ... */
+ uint8_t bf_sysid[8]; /* 'NTFS ' */
+ uint16_t bf_bps; /* bytes per sector */
+ uint8_t bf_spc; /* sectors per cluster */
+ uint8_t reserved2[7]; /* unused (zeroed) */
+ uint8_t bf_media; /* media desc. (0xF8) */
+ uint8_t reserved3[2];
+ uint16_t bf_spt; /* sectors per track */
+ uint16_t bf_heads; /* number of heads */
+ uint8_t reserver4[12];
+ uint64_t bf_spv; /* sectors per volume */
+ cn_t bf_mftcn; /* $MFT cluster number */
+ cn_t bf_mftmirrcn; /* $MFTMirr cn */
+ uint8_t bf_mftrecsz; /* MFT record size (clust) */
/* 0xF6 inducates 1/4 */
- u_int32_t bf_ibsz; /* index buffer size */
- u_int32_t bf_volsn; /* volume ser. num. */
+ uint32_t bf_ibsz; /* index buffer size */
+ uint32_t bf_volsn; /* volume ser. num. */
};
#define NTFS_SYSNODESNUM 0x0B
struct ntfsmount {
struct mount *ntm_mountp; /* filesystem vfs structure */
- struct bootfile ntm_bootfile;
+ struct bootfile ntm_bootfile;
struct g_consumer *ntm_cp;
struct bufobj *ntm_bo;
struct vnode *ntm_devvp; /* block device mounted vnode */
struct vnode *ntm_sysvn[NTFS_SYSNODESNUM];
- u_int32_t ntm_bpmftrec;
- uid_t ntm_uid;
- gid_t ntm_gid;
- mode_t ntm_mode;
+ uint32_t ntm_bpmftrec;
+ uid_t ntm_uid;
+ gid_t ntm_gid;
+ mode_t ntm_mode;
uint64_t ntm_flag;
cn_t ntm_cfree;
struct ntvattrdef *ntm_ad;
int ntm_adnum;
- wchar * ntm_82u; /* 8bit to Unicode */
- char ** ntm_u28; /* Unicode to 8 bit */
+ wchar * ntm_82u; /* 8bit to Unicode */
+ char ** ntm_u28; /* Unicode to 8 bit */
void * ntm_ic_l2u; /* Local to Unicode (iconv) */
void * ntm_ic_u2l; /* Unicode to Local (iconv) */
- u_int8_t ntm_multiplier; /* NTFS blockno to DEV_BSIZE sectorno */
+ uint8_t ntm_multiplier; /* NTFS blockno to DEV_BSIZE sectorno */
};
-#define ntm_mftcn ntm_bootfile.bf_mftcn
-#define ntm_mftmirrcn ntm_bootfile.bf_mftmirrcn
+#define ntm_mftcn ntm_bootfile.bf_mftcn
+#define ntm_mftmirrcn ntm_bootfile.bf_mftmirrcn
#define ntm_mftrecsz ntm_bootfile.bf_mftrecsz
#define ntm_spc ntm_bootfile.bf_spc
#define ntm_bps ntm_bootfile.bf_bps
@@ -272,17 +272,17 @@
#define NTFS_NEXTREC(s, type) ((type)(((caddr_t) s) + (s)->reclen))
/* Convert mount ptr to ntfsmount ptr. */
-#define VFSTONTFS(mp) ((struct ntfsmount *)((mp)->mnt_data))
-#define VTONT(v) FTONT(VTOF(v))
+#define VFSTONTFS(mp) ((struct ntfsmount *)((mp)->mnt_data))
+#define VTONT(v) FTONT(VTOF(v))
#define VTOF(v) ((struct fnode *)((v)->v_data))
#define FTOV(f) ((f)->f_vp)
#define FTONT(f) ((f)->f_ip)
-#define ntfs_cntobn(cn) (daddr_t)((cn) * (ntmp->ntm_spc))
-#define ntfs_cntob(cn) (off_t)((cn) * (ntmp)->ntm_spc * (ntmp)->ntm_bps)
-#define ntfs_btocn(off) (cn_t)((off) / ((ntmp)->ntm_spc * (ntmp)->ntm_bps))
-#define ntfs_btocl(off) (cn_t)((off + ntfs_cntob(1) - 1) / ((ntmp)->ntm_spc * (ntmp)->ntm_bps))
-#define ntfs_btocnoff(off) (off_t)((off) % ((ntmp)->ntm_spc * (ntmp)->ntm_bps))
-#define ntfs_bntob(bn) (daddr_t)((bn) * (ntmp)->ntm_bps)
+#define ntfs_cntobn(cn) (daddr_t)((cn) * (ntmp->ntm_spc))
+#define ntfs_cntob(cn) (off_t)((cn) * (ntmp)->ntm_spc * (ntmp)->ntm_bps)
+#define ntfs_btocn(off) (cn_t)((off) / ((ntmp)->ntm_spc * (ntmp)->ntm_bps))
+#define ntfs_btocl(off) (cn_t)((off + ntfs_cntob(1) - 1) / ((ntmp)->ntm_spc * (ntmp)->ntm_bps))
+#define ntfs_btocnoff(off) (off_t)((off) % ((ntmp)->ntm_spc * (ntmp)->ntm_bps))
+#define ntfs_bntob(bn) (daddr_t)((bn) * (ntmp)->ntm_bps)
#define ntfs_bpbl (daddr_t)((ntmp)->ntm_bps)
@@ -294,15 +294,15 @@
#endif
#if defined(NTFS_DEBUG)
-#define dprintf(a) printf a
+#define dprintf(a) printf a
#if NTFS_DEBUG > 1
-#define ddprintf(a) printf a
+#define ddprintf(a) printf a
#else
-#define ddprintf(a) (void)0
+#define ddprintf(a) (void)0
#endif
#else
-#define dprintf(a) (void)0
-#define ddprintf(a) (void)0
+#define dprintf(a) (void)0
+#define ddprintf(a) (void)0
#endif
extern struct vop_vector ntfs_vnodeops;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ntfs/ntfs_subr.c
--- a/head/sys/fs/ntfs/ntfs_subr.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ntfs/ntfs_subr.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/fs/ntfs/ntfs_subr.c 229407 2012-01-03 19:09:01Z pfg $
+ * $FreeBSD: head/sys/fs/ntfs/ntfs_subr.c 238315 2012-07-10 00:01:00Z attilio $
*/
#include <sys/param.h>
@@ -1353,174 +1353,6 @@
}
/*
- * This is one of write routine.
- */
-int
-ntfs_writeattr_plain(
- struct ntfsmount * ntmp,
- struct ntnode * ip,
- u_int32_t attrnum,
- char *attrname,
- off_t roff,
- size_t rsize,
- void *rdata,
- size_t * initp,
- struct uio *uio)
-{
- size_t init;
- int error = 0;
- off_t off = roff, left = rsize, towrite;
- caddr_t data = rdata;
- struct ntvattr *vap;
- *initp = 0;
-
- while (left) {
- error = ntfs_ntvattrget(ntmp, ip, attrnum, attrname,
- ntfs_btocn(off), &vap);
- if (error)
- return (error);
- towrite = MIN(left, ntfs_cntob(vap->va_vcnend + 1) - off);
- ddprintf(("ntfs_writeattr_plain: o: %d, s: %d (%d - %d)\n",
- (u_int32_t) off, (u_int32_t) towrite,
- (u_int32_t) vap->va_vcnstart,
- (u_int32_t) vap->va_vcnend));
- error = ntfs_writentvattr_plain(ntmp, ip, vap,
- off - ntfs_cntob(vap->va_vcnstart),
- towrite, data, &init, uio);
- if (error) {
- printf("ntfs_writeattr_plain: " \
- "ntfs_writentvattr_plain failed: o: %d, s: %d\n",
- (u_int32_t) off, (u_int32_t) towrite);
- printf("ntfs_writeattr_plain: attrib: %d - %d\n",
- (u_int32_t) vap->va_vcnstart,
- (u_int32_t) vap->va_vcnend);
- ntfs_ntvattrrele(vap);
- break;
- }
- ntfs_ntvattrrele(vap);
- left -= towrite;
- off += towrite;
- data = data + towrite;
- *initp += init;
- }
-
- return (error);
-}
-
-/*
- * This is one of write routine.
- *
- * ntnode should be locked.
- */
-int
-ntfs_writentvattr_plain(
- struct ntfsmount * ntmp,
- struct ntnode * ip,
- struct ntvattr * vap,
- off_t roff,
- size_t rsize,
- void *rdata,
- size_t * initp,
- struct uio *uio)
-{
- int error = 0;
- off_t off;
- int cnt;
- cn_t ccn, ccl, cn, left, cl;
- caddr_t data = rdata;
- struct buf *bp;
- size_t tocopy;
-
- *initp = 0;
-
- if ((vap->va_flag & NTFS_AF_INRUN) == 0) {
- printf("ntfs_writevattr_plain: CAN'T WRITE RES. ATTRIBUTE\n");
- return ENOTTY;
- }
-
- ddprintf(("ntfs_writentvattr_plain: data in run: %ld chains\n",
- vap->va_vruncnt));
-
- off = roff;
- left = rsize;
- ccl = 0;
- ccn = 0;
- cnt = 0;
- for (; left && (cnt < vap->va_vruncnt); cnt++) {
- ccn = vap->va_vruncn[cnt];
- ccl = vap->va_vruncl[cnt];
-
- ddprintf(("ntfs_writentvattr_plain: " \
- "left %d, cn: 0x%x, cl: %d, off: %d\n", \
- (u_int32_t) left, (u_int32_t) ccn, \
- (u_int32_t) ccl, (u_int32_t) off));
-
- if (ntfs_cntob(ccl) < off) {
- off -= ntfs_cntob(ccl);
- cnt++;
- continue;
- }
- if (!ccn && ip->i_number != NTFS_BOOTINO)
- continue; /* XXX */
-
- ccl -= ntfs_btocn(off);
- cn = ccn + ntfs_btocn(off);
- off = ntfs_btocnoff(off);
-
- while (left && ccl) {
- /*
- * Always read and write single clusters at a time -
- * we need to avoid requesting differently-sized
- * blocks at the same disk offsets to avoid
- * confusing the buffer cache.
- */
- tocopy = MIN(left, ntfs_cntob(1) - off);
- cl = ntfs_btocl(tocopy + off);
- KASSERT(cl == 1 && tocopy <= ntfs_cntob(1),
- ("single cluster limit mistake"));
- ddprintf(("ntfs_writentvattr_plain: write: " \
- "cn: 0x%x cl: %d, off: %d len: %d, left: %d\n",
- (u_int32_t) cn, (u_int32_t) cl,
- (u_int32_t) off, (u_int32_t) tocopy,
- (u_int32_t) left));
- if ((off == 0) && (tocopy == ntfs_cntob(cl)))
- {
- bp = getblk(ntmp->ntm_devvp, ntfs_cntobn(cn)
- * ntmp->ntm_multiplier,
- ntfs_cntob(cl), 0, 0, 0);
- clrbuf(bp);
- } else {
- error = bread(ntmp->ntm_devvp, ntfs_cntobn(cn)
- * ntmp->ntm_multiplier,
- ntfs_cntob(cl), NOCRED, &bp);
- if (error) {
- brelse(bp);
- return (error);
- }
- }
- if (uio)
- uiomove(bp->b_data + off, tocopy, uio);
- else
- memcpy(bp->b_data + off, data, tocopy);
- bawrite(bp);
- data = data + tocopy;
- *initp += tocopy;
- off = 0;
- left -= tocopy;
- cn += cl;
- ccl -= cl;
- }
- }
-
- if (left) {
- printf("ntfs_writentvattr_plain: POSSIBLE RUN ERROR\n");
- error = EINVAL;
- }
-
- return (error);
-}
-
-/*
* This is one of read routines.
*
* ntnode should be locked.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ntfs/ntfs_subr.h
--- a/head/sys/fs/ntfs/ntfs_subr.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ntfs/ntfs_subr.h Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/fs/ntfs/ntfs_subr.h 228864 2011-12-24 15:49:52Z kevlo $
+ * $FreeBSD: head/sys/fs/ntfs/ntfs_subr.h 238315 2012-07-10 00:01:00Z attilio $
*/
#define VA_LOADED 0x0001
@@ -99,8 +99,6 @@
void ntfs_ntrele(struct ntnode *);
void ntfs_ntput(struct ntnode *);
int ntfs_loadntnode( struct ntfsmount *, struct ntnode * );
-int ntfs_writentvattr_plain(struct ntfsmount *, struct ntnode *, struct ntvattr *, off_t, size_t, void *, size_t *, struct uio *);
-int ntfs_writeattr_plain(struct ntfsmount *, struct ntnode *, u_int32_t, char *, off_t, size_t, void *, size_t *, struct uio *);
void ntfs_toupper_init(void);
void ntfs_toupper_destroy(void);
int ntfs_toupper_use(struct mount *, struct ntfsmount *);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ntfs/ntfs_vfsops.c
--- a/head/sys/fs/ntfs/ntfs_vfsops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ntfs/ntfs_vfsops.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/fs/ntfs/ntfs_vfsops.c 232483 2012-03-04 09:38:20Z kevlo $
+ * $FreeBSD: head/sys/fs/ntfs/ntfs_vfsops.c 238320 2012-07-10 00:23:25Z attilio $
*/
@@ -152,7 +152,6 @@
ntfs_mount(struct mount *mp)
{
int err = 0, error;
- accmode_t accmode;
struct vnode *devvp;
struct nameidata ndp;
struct thread *td;
@@ -162,6 +161,11 @@
if (vfs_filteropt(mp->mnt_optnew, ntfs_opts))
return (EINVAL);
+ /* Force mount as read-only. */
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= MNT_RDONLY;
+ MNT_IUNLOCK(mp);
+
from = vfs_getopts(mp->mnt_optnew, "from", &error);
if (error)
return (error);
@@ -173,11 +177,10 @@
if (mp->mnt_flag & MNT_UPDATE) {
if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
/* Process export requests in vfs_mount.c */
- goto success;
+ return (0);
} else {
printf("ntfs_mount(): MNT_UPDATE not supported\n");
- err = EINVAL;
- goto error_1;
+ return (EINVAL);
}
}
@@ -187,10 +190,8 @@
*/
NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, from, td);
err = namei(&ndp);
- if (err) {
- /* can't get devvp!*/
- goto error_1;
- }
+ if (err)
+ return (err);
NDFREE(&ndp, NDF_ONLY_PNBUF);
devvp = ndp.ni_vp;
@@ -203,10 +204,7 @@
* If mount by non-root, then verify that user has necessary
* permissions on the device.
*/
- accmode = VREAD;
- if ((mp->mnt_flag & MNT_RDONLY) == 0)
- accmode |= VWRITE;
- err = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
+ err = VOP_ACCESS(devvp, VREAD, td->td_ucred, td);
if (err)
err = priv_check(td, PRIV_VFS_MOUNT_PERM);
if (err) {
@@ -214,52 +212,23 @@
return (err);
}
- if (mp->mnt_flag & MNT_UPDATE) {
-#if 0
- /*
- ********************
- * UPDATE
- ********************
- */
- if (devvp != ntmp->um_devvp)
- err = EINVAL; /* needs translation */
- vput(devvp);
- if (err)
- return (err);
-#endif
- } else {
- /*
- ********************
- * NEW MOUNT
- ********************
- */
+ /*
+ * Since this is a new mount, we want the names for the device and
+ * the mount point copied in. If an error occurs, the mountpoint is
+ * discarded by the upper level code. Note that vfs_mount() handles
+ * copying the mountpoint f_mntonname for us, so we don't have to do
+ * it here unless we want to set it to something other than "path"
+ * for some rason.
+ */
- /*
- * Since this is a new mount, we want the names for
- * the device and the mount point copied in. If an
- * error occurs, the mountpoint is discarded by the
- * upper level code. Note that vfs_mount() handles
- * copying the mountpoint f_mntonname for us, so we
- * don't have to do it here unless we want to set it
- * to something other than "path" for some rason.
- */
- /* Save "mounted from" info for mount point (NULL pad)*/
+ err = ntfs_mountfs(devvp, mp, td);
+ if (err == 0) {
+
+ /* Save "mounted from" info for mount point. */
vfs_mountedfrom(mp, from);
-
- err = ntfs_mountfs(devvp, mp, td);
- }
- if (err) {
+ } else
vrele(devvp);
- return (err);
- }
-
- goto success;
-
-error_1: /* no state to back out*/
- /* XXX: missing NDFREE(&ndp, ...) */
-
-success:
return (err);
}
@@ -275,13 +244,12 @@
struct buf *bp;
struct ntfsmount *ntmp;
struct cdev *dev = devvp->v_rdev;
- int error, ronly, i, v;
+ int error, i, v;
struct vnode *vp;
struct g_consumer *cp;
struct g_provider *pp;
char *cs_ntfs, *cs_local;
- ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
DROP_GIANT();
g_topology_lock();
@@ -296,7 +264,7 @@
if ((pp != NULL) && ((pp->acr | pp->acw | pp->ace ) != 0))
error = EPERM;
else
- error = g_vfs_open(devvp, &cp, "ntfs", ronly ? 0 : 1);
+ error = g_vfs_open(devvp, &cp, "ntfs", 0);
g_topology_unlock();
PICKUP_GIANT();
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/ntfs/ntfs_vnops.c
--- a/head/sys/fs/ntfs/ntfs_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/ntfs/ntfs_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/fs/ntfs/ntfs_vnops.c 228864 2011-12-24 15:49:52Z kevlo $
+ * $FreeBSD: head/sys/fs/ntfs/ntfs_vnops.c 238315 2012-07-10 00:01:00Z attilio $
*
*/
@@ -67,7 +67,6 @@
#include <sys/unistd.h> /* for pathconf(2) constants */
static vop_read_t ntfs_read;
-static vop_write_t ntfs_write;
static vop_getattr_t ntfs_getattr;
static vop_inactive_t ntfs_inactive;
static vop_reclaim_t ntfs_reclaim;
@@ -78,7 +77,6 @@
static vop_close_t ntfs_close;
static vop_readdir_t ntfs_readdir;
static vop_cachedlookup_t ntfs_lookup;
-static vop_fsync_t ntfs_fsync;
static vop_pathconf_t ntfs_pathconf;
static vop_vptofh_t ntfs_vptofh;
@@ -272,6 +270,7 @@
register struct fnode *fp = VTOF(vp);
register struct ntnode *ip = FTONT(fp);
struct ntfsmount *ntmp = ip->i_mp;
+ u_int32_t toread;
int error;
dprintf(("ntfs_strategy: offset: %d, blkno: %d, lblkno: %d\n",
@@ -281,99 +280,33 @@
dprintf(("strategy: bcount: %d flags: 0x%x\n",
(u_int32_t)bp->b_bcount,bp->b_flags));
- if (bp->b_iocmd == BIO_READ) {
- u_int32_t toread;
+ KASSERT(bp->b_iocmd == BIO_READ, ("Invalid buffer\n"));
- if (ntfs_cntob(bp->b_blkno) >= fp->f_size) {
- clrbuf(bp);
- error = 0;
- } else {
- toread = MIN(bp->b_bcount,
- fp->f_size-ntfs_cntob(bp->b_blkno));
- dprintf(("ntfs_strategy: toread: %d, fsize: %d\n",
- toread,(u_int32_t)fp->f_size));
+ if (ntfs_cntob(bp->b_blkno) >= fp->f_size) {
+ clrbuf(bp);
+ error = 0;
+ } else {
+ toread = MIN(bp->b_bcount,
+ fp->f_size-ntfs_cntob(bp->b_blkno));
+ dprintf(("ntfs_strategy: toread: %d, fsize: %d\n",
+ toread,(u_int32_t)fp->f_size));
- error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
- fp->f_attrname, ntfs_cntob(bp->b_blkno),
- toread, bp->b_data, NULL);
+ error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
+ fp->f_attrname, ntfs_cntob(bp->b_blkno),
+ toread, bp->b_data, NULL);
- if (error) {
- printf("ntfs_strategy: ntfs_readattr failed\n");
- bp->b_error = error;
- bp->b_ioflags |= BIO_ERROR;
- }
+ if (error) {
+ printf("ntfs_strategy: ntfs_readattr failed\n");
+ bp->b_error = error;
+ bp->b_ioflags |= BIO_ERROR;
+ }
- bzero(bp->b_data + toread, bp->b_bcount - toread);
- }
- } else {
- size_t tmp;
- u_int32_t towrite;
-
- if (ntfs_cntob(bp->b_blkno) + bp->b_bcount >= fp->f_size) {
- printf("ntfs_strategy: CAN'T EXTEND FILE\n");
- bp->b_error = error = EFBIG;
- bp->b_ioflags |= BIO_ERROR;
- } else {
- towrite = MIN(bp->b_bcount,
- fp->f_size-ntfs_cntob(bp->b_blkno));
- dprintf(("ntfs_strategy: towrite: %d, fsize: %d\n",
- towrite,(u_int32_t)fp->f_size));
-
- error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,
- fp->f_attrname, ntfs_cntob(bp->b_blkno),towrite,
- bp->b_data, &tmp, NULL);
-
- if (error) {
- printf("ntfs_strategy: ntfs_writeattr fail\n");
- bp->b_error = error;
- bp->b_ioflags |= BIO_ERROR;
- }
- }
+ bzero(bp->b_data + toread, bp->b_bcount - toread);
}
bufdone(bp);
return (0);
}
-static int
-ntfs_write(ap)
- struct vop_write_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap;
-{
- register struct vnode *vp = ap->a_vp;
- register struct fnode *fp = VTOF(vp);
- register struct ntnode *ip = FTONT(fp);
- struct uio *uio = ap->a_uio;
- struct ntfsmount *ntmp = ip->i_mp;
- u_int64_t towrite;
- size_t written;
- int error;
-
- dprintf(("ntfs_write: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
- dprintf(("ntfs_write: filesize: %d",(u_int32_t)fp->f_size));
-
- if (uio->uio_resid + uio->uio_offset > fp->f_size) {
- printf("ntfs_write: CAN'T WRITE BEYOND END OF FILE\n");
- return (EFBIG);
- }
-
- towrite = MIN(uio->uio_resid, fp->f_size - uio->uio_offset);
-
- dprintf((", towrite: %d\n",(u_int32_t)towrite));
-
- error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,
- fp->f_attrname, uio->uio_offset, towrite, NULL, &written, uio);
-#ifdef NTFS_DEBUG
- if (error)
- printf("ntfs_write: ntfs_writeattr failed: %d\n", error);
-#endif
-
- return (error);
-}
-
int
ntfs_access(ap)
struct vop_access_args /* {
@@ -390,7 +323,7 @@
dprintf(("ntfs_access: %d\n",ip->i_number));
/*
- * Disallow write attempts on read-only filesystems;
+ * Disallow write attempts as we assume read-only filesystems;
* unless the file is a socket, fifo, or a block or
* character device resident on the filesystem.
*/
@@ -399,8 +332,8 @@
case VDIR:
case VLNK:
case VREG:
- if (vp->v_mount->mnt_flag & MNT_RDONLY)
- return (EROFS);
+ return (EROFS);
+ default:
break;
}
}
@@ -493,8 +426,13 @@
/* Simulate . in every dir except ROOT */
if( ip->i_number != NTFS_ROOTINO ) {
- struct dirent dot = { NTFS_ROOTINO,
- sizeof(struct dirent), DT_DIR, 1, "." };
+ struct dirent dot = {
+ .d_fileno = NTFS_ROOTINO,
+ .d_reclen = sizeof(struct dirent),
+ .d_type = DT_DIR,
+ .d_namlen = 1,
+ .d_name = "."
+ };
if( uio->uio_offset < sizeof(struct dirent) ) {
dot.d_fileno = ip->i_number;
@@ -508,8 +446,13 @@
/* Simulate .. in every dir including ROOT */
if( uio->uio_offset < 2 * sizeof(struct dirent) ) {
- struct dirent dotdot = { NTFS_ROOTINO,
- sizeof(struct dirent), DT_DIR, 2, ".." };
+ struct dirent dotdot = {
+ .d_fileno = NTFS_ROOTINO,
+ .d_reclen = sizeof(struct dirent),
+ .d_type = DT_DIR,
+ .d_namlen = 2,
+ .d_name = ".."
+ };
error = uiomove((char *)&dotdot,sizeof(struct dirent),uio);
if(error)
@@ -620,7 +563,6 @@
return (error);
if ((cnp->cn_flags & ISLASTCN) &&
- (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
return (EROFS);
@@ -669,24 +611,6 @@
}
/*
- * Flush the blocks of a file to disk.
- *
- * This function is worthless for vnodes that represent directories. Maybe we
- * could just do a sync if they try an fsync on a directory file.
- */
-static int
-ntfs_fsync(ap)
- struct vop_fsync_args /* {
- struct vnode *a_vp;
- struct ucred *a_cred;
- int a_waitfor;
- struct thread *a_td;
- } */ *ap;
-{
- return (0);
-}
-
-/*
* Return POSIX pathconf information applicable to NTFS filesystem
*/
int
@@ -746,7 +670,6 @@
.vop_bmap = ntfs_bmap,
.vop_cachedlookup = ntfs_lookup,
.vop_close = ntfs_close,
- .vop_fsync = ntfs_fsync,
.vop_getattr = ntfs_getattr,
.vop_inactive = ntfs_inactive,
.vop_lookup = vfs_cache_lookup,
@@ -756,6 +679,5 @@
.vop_readdir = ntfs_readdir,
.vop_reclaim = ntfs_reclaim,
.vop_strategy = ntfs_strategy,
- .vop_write = ntfs_write,
.vop_vptofh = ntfs_vptofh,
};
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/nullfs/null_vnops.c
--- a/head/sys/fs/nullfs/null_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/nullfs/null_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -36,7 +36,7 @@
* ...and...
* @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
*
- * $FreeBSD: head/sys/fs/nullfs/null_vnops.c 232303 2012-02-29 15:15:36Z kib $
+ * $FreeBSD: head/sys/fs/nullfs/null_vnops.c 234607 2012-04-23 14:10:34Z trasz $
*/
/*
@@ -678,7 +678,6 @@
null_inactive(struct vop_inactive_args *ap)
{
struct vnode *vp = ap->a_vp;
- struct thread *td = ap->a_td;
vp->v_object = NULL;
@@ -686,7 +685,7 @@
* If this is the last reference, then free up the vnode
* so as not to tie up the lower vnodes.
*/
- vrecycle(vp, td);
+ vrecycle(vp);
return (0);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/portalfs/portal_vnops.c
--- a/head/sys/fs/portalfs/portal_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/portalfs/portal_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
*
* @(#)portal_vnops.c 8.14 (Berkeley) 5/21/95
*
- * $FreeBSD: head/sys/fs/portalfs/portal_vnops.c 226497 2011-10-18 07:31:49Z des $
+ * $FreeBSD: head/sys/fs/portalfs/portal_vnops.c 238697 2012-07-22 15:40:31Z kevlo $
*/
/*
@@ -110,7 +110,7 @@
char *pname = cnp->cn_nameptr;
struct portalnode *pt;
int error;
- struct vnode *fvp = 0;
+ struct vnode *fvp = NULL;
char *path;
int size;
@@ -217,14 +217,14 @@
struct thread *a_td;
} */ *ap;
{
- struct socket *so = 0;
+ struct socket *so = NULL;
struct portalnode *pt;
struct thread *td = ap->a_td;
struct vnode *vp = ap->a_vp;
struct uio auio;
struct iovec aiov[2];
int res;
- struct mbuf *cm = 0;
+ struct mbuf *cm = NULL;
struct cmsghdr *cmsg;
int newfds;
int *ip;
@@ -356,7 +356,7 @@
len = auio.uio_resid = sizeof(int);
do {
- struct mbuf *m = 0;
+ struct mbuf *m = NULL;
int flags = MSG_WAITALL;
error = soreceive(so, (struct sockaddr **) 0, &auio,
&m, &cm, &flags);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/smbfs/smbfs_node.c
--- a/head/sys/fs/smbfs/smbfs_node.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/smbfs/smbfs_node.c Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/fs/smbfs/smbfs_node.c 227293 2011-11-07 06:44:47Z ed $
+ * $FreeBSD: head/sys/fs/smbfs/smbfs_node.c 238539 2012-07-16 22:07:29Z brueffer $
*/
#include <sys/param.h>
#include <sys/systm.h>
@@ -223,19 +223,16 @@
if (fap == NULL)
return ENOENT;
- np = malloc(sizeof *np, M_SMBNODE, M_WAITOK);
error = getnewvnode("smbfs", mp, &smbfs_vnodeops, &vp);
- if (error) {
- free(np, M_SMBNODE);
- return error;
- }
+ if (error != 0)
+ return (error);
error = insmntque(vp, mp); /* XXX: Too early for mpsafe fs */
- if (error != 0) {
- free(np, M_SMBNODE);
+ if (error != 0)
return (error);
- }
+
+ np = malloc(sizeof *np, M_SMBNODE, M_WAITOK | M_ZERO);
+
vp->v_type = fap->fa_attr & SMB_FA_DIR ? VDIR : VREG;
- bzero(np, sizeof(*np));
vp->v_data = np;
np->n_vnode = vp;
np->n_mount = VFSTOSMBFS(mp);
@@ -373,7 +370,7 @@
smbfs_attr_cacheremove(vp);
}
if (np->n_flag & NGONE)
- vrecycle(vp, td);
+ vrecycle(vp);
return (0);
}
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/tmpfs/tmpfs_vnops.c
--- a/head/sys/fs/tmpfs/tmpfs_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/tmpfs/tmpfs_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -34,7 +34,7 @@
* tmpfs vnode interface.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/fs/tmpfs/tmpfs_vnops.c 234064 2012-04-09 17:05:18Z attilio $");
+__FBSDID("$FreeBSD: head/sys/fs/tmpfs/tmpfs_vnops.c 234607 2012-04-23 14:10:34Z trasz $");
#include <sys/param.h>
#include <sys/fcntl.h>
@@ -1577,7 +1577,6 @@
tmpfs_inactive(struct vop_inactive_args *v)
{
struct vnode *vp = v->a_vp;
- struct thread *l = v->a_td;
struct tmpfs_node *node;
@@ -1586,7 +1585,7 @@
node = VP_TO_TMPFS_NODE(vp);
if (node->tn_links == 0)
- vrecycle(vp, l);
+ vrecycle(vp);
return 0;
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/udf/udf_vfsops.c
--- a/head/sys/fs/udf/udf_vfsops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/udf/udf_vfsops.c Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/fs/udf/udf_vfsops.c 222167 2011-05-22 01:07:54Z rmacklem $
+ * $FreeBSD: head/sys/fs/udf/udf_vfsops.c 238697 2012-07-22 15:40:31Z kevlo $
*/
/* udf_vfsops.c */
@@ -190,7 +190,7 @@
{
struct vnode *devvp; /* vnode of the mount device */
struct thread *td;
- struct udf_mnt *imp = 0;
+ struct udf_mnt *imp = NULL;
struct vfsoptlist *opts;
char *fspec, *cs_disk, *cs_local;
int error, len, *udf_flags;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/unionfs/union_subr.c
--- a/head/sys/fs/unionfs/union_subr.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/unionfs/union_subr.c Wed Jul 25 16:40:53 2012 +0300
@@ -2,8 +2,8 @@
* Copyright (c) 1994 Jan-Simon Pendry
* Copyright (c) 1994
* The Regents of the University of California. All rights reserved.
- * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
- * Copyright (c) 2006 Daichi Goto <daichi at freebsd.org>
+ * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
+ * Copyright (c) 2006, 2012 Daichi Goto <daichi at freebsd.org>
*
* This code is derived from software contributed to Berkeley by
* Jan-Simon Pendry.
@@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)union_subr.c 8.20 (Berkeley) 5/20/95
- * $FreeBSD: head/sys/fs/unionfs/union_subr.c 232701 2012-03-08 20:27:20Z jhb $
+ * $FreeBSD: head/sys/fs/unionfs/union_subr.c 235503 2012-05-16 10:44:09Z gleb $
*/
#include <sys/param.h>
@@ -350,19 +350,22 @@
uvp = unp->un_uppervp;
dvp = unp->un_dvp;
unp->un_lowervp = unp->un_uppervp = NULLVP;
-
vp->v_vnlock = &(vp->v_lock);
vp->v_data = NULL;
- lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
+ vp->v_object = NULL;
+ VI_UNLOCK(vp);
+
if (lvp != NULLVP)
- VOP_UNLOCK(lvp, 0);
+ VOP_UNLOCK(lvp, LK_RELEASE);
if (uvp != NULLVP)
- VOP_UNLOCK(uvp, 0);
- vp->v_object = NULL;
+ VOP_UNLOCK(uvp, LK_RELEASE);
if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
unionfs_rem_cached_vnode(unp, dvp);
+ if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
+ panic("the lock for deletion is unacquirable.");
+
if (lvp != NULLVP) {
vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
vrele(lvp);
@@ -550,7 +553,7 @@
cn->cn_flags |= (cnp->cn_flags & SAVESTART);
vref(dvp);
- VOP_UNLOCK(dvp, 0);
+ VOP_UNLOCK(dvp, LK_RELEASE);
if ((error = relookup(dvp, vpp, cn))) {
uma_zfree(namei_zone, cn->cn_pnbuf);
@@ -957,7 +960,7 @@
*vpp = vp;
unionfs_vn_create_on_upper_free_out1:
- VOP_UNLOCK(udvp, 0);
+ VOP_UNLOCK(udvp, LK_RELEASE);
unionfs_vn_create_on_upper_free_out2:
if (cn.cn_flags & HASBUF) {
@@ -1181,7 +1184,7 @@
edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
for (dp = (struct dirent*)buf; !error && dp < edp;
dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
- if (dp->d_type == DT_WHT ||
+ if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
(dp->d_namlen == 1 && dp->d_name[0] == '.') ||
(dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
continue;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/unionfs/union_vfsops.c
--- a/head/sys/fs/unionfs/union_vfsops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/unionfs/union_vfsops.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,8 +1,8 @@
/*-
* Copyright (c) 1994, 1995 The Regents of the University of California.
* Copyright (c) 1994, 1995 Jan-Simon Pendry.
- * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
- * Copyright (c) 2006 Daichi Goto <daichi at freebsd.org>
+ * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
+ * Copyright (c) 2006, 2012 Daichi Goto <daichi at freebsd.org>
* All rights reserved.
*
* This code is derived from software donated to Berkeley by
@@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95
- * $FreeBSD: head/sys/fs/unionfs/union_vfsops.c 232918 2012-03-13 10:04:13Z kevlo $
+ * $FreeBSD: head/sys/fs/unionfs/union_vfsops.c 234867 2012-05-01 07:46:30Z daichi $
*/
#include <sys/param.h>
@@ -165,7 +165,7 @@
uid = va.va_uid;
gid = va.va_gid;
}
- VOP_UNLOCK(mp->mnt_vnodecovered, 0);
+ VOP_UNLOCK(mp->mnt_vnodecovered, LK_RELEASE);
if (error)
return (error);
@@ -250,7 +250,7 @@
* Save reference
*/
if (below) {
- VOP_UNLOCK(upperrootvp, 0);
+ VOP_UNLOCK(upperrootvp, LK_RELEASE);
vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY);
ump->um_lowervp = upperrootvp;
ump->um_uppervp = lowerrootvp;
@@ -281,7 +281,7 @@
/*
* Unlock the node
*/
- VOP_UNLOCK(ump->um_uppervp, 0);
+ VOP_UNLOCK(ump->um_uppervp, LK_RELEASE);
/*
* Get the unionfs root vnode.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/fs/unionfs/union_vnops.c
--- a/head/sys/fs/unionfs/union_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/fs/unionfs/union_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -2,8 +2,8 @@
* Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
* Copyright (c) 1992, 1993, 1994, 1995
* The Regents of the University of California.
- * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
- * Copyright (c) 2006 Daichi Goto <daichi at freebsd.org>
+ * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
+ * Copyright (c) 2006, 2012 Daichi Goto <daichi at freebsd.org>
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)union_vnops.c 8.32 (Berkeley) 6/23/95
- * $FreeBSD: head/sys/fs/unionfs/union_vnops.c 226234 2011-10-10 21:32:08Z trasz $
+ * $FreeBSD: head/sys/fs/unionfs/union_vnops.c 234944 2012-05-03 07:22:29Z daichi $
*
*/
@@ -75,21 +75,6 @@
KASSERT(((vp)->v_op == &unionfs_vnodeops), \
("unionfs: it is not unionfs-vnode"))
-/* lockmgr lock <-> reverse table */
-struct lk_lr_table {
- int lock;
- int revlock;
-};
-
-static struct lk_lr_table un_llt[] = {
- {LK_SHARED, LK_RELEASE},
- {LK_EXCLUSIVE, LK_RELEASE},
- {LK_UPGRADE, LK_DOWNGRADE},
- {LK_DOWNGRADE, LK_UPGRADE},
- {0, 0}
-};
-
-
static int
unionfs_lookup(struct vop_cachedlookup_args *ap)
{
@@ -141,7 +126,7 @@
if (udvp != NULLVP) {
dtmpvp = udvp;
if (ldvp != NULLVP)
- VOP_UNLOCK(ldvp, 0);
+ VOP_UNLOCK(ldvp, LK_RELEASE);
}
else
dtmpvp = ldvp;
@@ -149,7 +134,7 @@
error = VOP_LOOKUP(dtmpvp, &vp, cnp);
if (dtmpvp == udvp && ldvp != NULLVP) {
- VOP_UNLOCK(udvp, 0);
+ VOP_UNLOCK(udvp, LK_RELEASE);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
}
@@ -161,10 +146,10 @@
*/
if (nameiop == DELETE || nameiop == RENAME ||
(cnp->cn_lkflags & LK_TYPE_MASK))
- VOP_UNLOCK(vp, 0);
+ VOP_UNLOCK(vp, LK_RELEASE);
vrele(vp);
- VOP_UNLOCK(dvp, 0);
+ VOP_UNLOCK(dvp, LK_RELEASE);
*(ap->a_vpp) = dunp->un_dvp;
vref(dunp->un_dvp);
@@ -202,7 +187,7 @@
}
if (nameiop == DELETE || nameiop == RENAME ||
(cnp->cn_lkflags & LK_TYPE_MASK))
- VOP_UNLOCK(uvp, 0);
+ VOP_UNLOCK(uvp, LK_RELEASE);
}
/* check whiteout */
@@ -246,7 +231,7 @@
return (lerror);
}
if (cnp->cn_lkflags & LK_TYPE_MASK)
- VOP_UNLOCK(lvp, 0);
+ VOP_UNLOCK(lvp, LK_RELEASE);
}
}
@@ -281,7 +266,7 @@
goto unionfs_lookup_out;
if (LK_SHARED == (cnp->cn_lkflags & LK_TYPE_MASK))
- VOP_UNLOCK(vp, 0);
+ VOP_UNLOCK(vp, LK_RELEASE);
if (LK_EXCLUSIVE != VOP_ISLOCKED(vp)) {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
lockflag = 1;
@@ -289,7 +274,7 @@
error = unionfs_mkshadowdir(MOUNTTOUNIONFSMOUNT(dvp->v_mount),
udvp, VTOUNIONFS(vp), cnp, td);
if (lockflag != 0)
- VOP_UNLOCK(vp, 0);
+ VOP_UNLOCK(vp, LK_RELEASE);
if (error != 0) {
UNIONFSDEBUG("unionfs_lookup: Unable to create shadow dir.");
if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE)
@@ -386,7 +371,7 @@
if (vp->v_type == VSOCK)
*(ap->a_vpp) = vp;
else {
- VOP_UNLOCK(vp, 0);
+ VOP_UNLOCK(vp, LK_RELEASE);
error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP,
ap->a_dvp, ap->a_vpp, cnp, curthread);
vrele(vp);
@@ -460,7 +445,7 @@
if (vp->v_type == VSOCK)
*(ap->a_vpp) = vp;
else {
- VOP_UNLOCK(vp, 0);
+ VOP_UNLOCK(vp, LK_RELEASE);
error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP,
ap->a_dvp, ap->a_vpp, cnp, curthread);
vrele(vp);
@@ -564,6 +549,7 @@
struct unionfs_node_status *unsp;
struct ucred *cred;
struct thread *td;
+ struct vnode *vp;
struct vnode *ovp;
UNIONFS_INTERNAL_DEBUG("unionfs_close: enter\n");
@@ -571,12 +557,14 @@
KASSERT_UNIONFS_VNODE(ap->a_vp);
locked = 0;
- unp = VTOUNIONFS(ap->a_vp);
+ vp = ap->a_vp;
+ unp = VTOUNIONFS(vp);
cred = ap->a_cred;
td = ap->a_td;
- if (VOP_ISLOCKED(ap->a_vp) != LK_EXCLUSIVE) {
- vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+ if (vn_lock(vp, LK_UPGRADE) != 0)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
locked = 1;
}
unionfs_get_node_status(unp, td, &unsp);
@@ -599,7 +587,7 @@
if (error != 0)
goto unionfs_close_abort;
- ap->a_vp->v_object = ovp->v_object;
+ vp->v_object = ovp->v_object;
if (ovp == unp->un_uppervp) {
unsp->uns_upper_opencnt--;
@@ -610,7 +598,7 @@
unsp->uns_lower_opencnt--;
}
if (unsp->uns_lower_opencnt > 0)
- ap->a_vp->v_object = unp->un_lowervp->v_object;
+ vp->v_object = unp->un_lowervp->v_object;
}
} else
unsp->uns_lower_opencnt--;
@@ -619,7 +607,7 @@
unionfs_tryrem_node_status(unp, unsp);
if (locked != 0)
- VOP_UNLOCK(ap->a_vp, 0);
+ vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
UNIONFS_INTERNAL_DEBUG("unionfs_close: leave (%d)\n", error);
@@ -914,7 +902,7 @@
unionfs_get_node_status(unp, ap->a_td, &unsp);
ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp);
unionfs_tryrem_node_status(unp, unsp);
- VOP_UNLOCK(ap->a_vp, 0);
+ VOP_UNLOCK(ap->a_vp, LK_RELEASE);
if (ovp == NULLVP)
return (EBADF);
@@ -941,7 +929,7 @@
unionfs_get_node_status(unp, ap->a_td, &unsp);
ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp);
unionfs_tryrem_node_status(unp, unsp);
- VOP_UNLOCK(ap->a_vp, 0);
+ VOP_UNLOCK(ap->a_vp, LK_RELEASE);
if (ovp == NULLVP)
return (EBADF);
@@ -1001,7 +989,7 @@
ump = NULL;
vp = uvp = lvp = NULLVP;
/* search vnode */
- VOP_UNLOCK(ap->a_vp, 0);
+ VOP_UNLOCK(ap->a_vp, LK_RELEASE);
error = unionfs_relookup(udvp, &vp, cnp, &cn, td,
cnp->cn_nameptr, strlen(cnp->cn_nameptr), DELETE);
if (error != 0 && error != ENOENT) {
@@ -1204,7 +1192,7 @@
if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
goto unionfs_rename_abort;
error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td);
- VOP_UNLOCK(fvp, 0);
+ VOP_UNLOCK(fvp, LK_RELEASE);
if (error != 0)
goto unionfs_rename_abort;
break;
@@ -1212,7 +1200,7 @@
if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
goto unionfs_rename_abort;
error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td);
- VOP_UNLOCK(fvp, 0);
+ VOP_UNLOCK(fvp, LK_RELEASE);
if (error != 0)
goto unionfs_rename_abort;
break;
@@ -1269,13 +1257,13 @@
if ((error = vn_lock(fdvp, LK_EXCLUSIVE)) != 0)
goto unionfs_rename_abort;
error = unionfs_relookup_for_delete(fdvp, fcnp, td);
- VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(fdvp, LK_RELEASE);
if (error != 0)
goto unionfs_rename_abort;
/* Locke of tvp is canceled in order to avoid recursive lock. */
if (tvp != NULLVP && tvp != tdvp)
- VOP_UNLOCK(tvp, 0);
+ VOP_UNLOCK(tvp, LK_RELEASE);
error = unionfs_relookup_for_rename(tdvp, tcnp, td);
if (tvp != NULLVP && tvp != tdvp)
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
@@ -1293,11 +1281,11 @@
}
if (ltdvp != NULLVP)
- VOP_UNLOCK(ltdvp, 0);
+ VOP_UNLOCK(ltdvp, LK_RELEASE);
if (tdvp != rtdvp)
vrele(tdvp);
if (ltvp != NULLVP)
- VOP_UNLOCK(ltvp, 0);
+ VOP_UNLOCK(ltvp, LK_RELEASE);
if (tvp != rtvp && tvp != NULLVP) {
if (rtvp == NULLVP)
vput(tvp);
@@ -1371,7 +1359,7 @@
}
if ((error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap)) == 0) {
- VOP_UNLOCK(uvp, 0);
+ VOP_UNLOCK(uvp, LK_RELEASE);
cnp->cn_lkflags = LK_EXCLUSIVE;
error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP,
ap->a_dvp, ap->a_vpp, cnp, td);
@@ -1427,7 +1415,9 @@
ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
if (ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP)
cnp->cn_flags |= DOWHITEOUT;
- error = VOP_RMDIR(udvp, uvp, cnp);
+ error = unionfs_relookup_for_delete(ap->a_dvp, cnp, td);
+ if (!error)
+ error = VOP_RMDIR(udvp, uvp, cnp);
}
else if (lvp != NULLVP)
error = unionfs_mkwhiteout(udvp, cnp, td, unp->un_path);
@@ -1467,7 +1457,7 @@
if (udvp != NULLVP) {
error = VOP_SYMLINK(udvp, &uvp, cnp, ap->a_vap, ap->a_target);
if (error == 0) {
- VOP_UNLOCK(uvp, 0);
+ VOP_UNLOCK(uvp, LK_RELEASE);
cnp->cn_lkflags = LK_EXCLUSIVE;
error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP,
ap->a_dvp, ap->a_vpp, cnp, td);
@@ -1487,9 +1477,11 @@
int error;
int eofflag;
int locked;
+ int uio_offset_bk;
struct unionfs_node *unp;
struct unionfs_node_status *unsp;
struct uio *uio;
+ struct vnode *vp;
struct vnode *uvp;
struct vnode *lvp;
struct thread *td;
@@ -1505,17 +1497,42 @@
error = 0;
eofflag = 0;
locked = 0;
- unp = VTOUNIONFS(ap->a_vp);
+ uio_offset_bk = 0;
uio = ap->a_uio;
- uvp = unp->un_uppervp;
- lvp = unp->un_lowervp;
+ uvp = NULLVP;
+ lvp = NULLVP;
td = uio->uio_td;
ncookies_bk = 0;
cookies_bk = NULL;
- if (ap->a_vp->v_type != VDIR)
+ vp = ap->a_vp;
+ if (vp->v_type != VDIR)
return (ENOTDIR);
+ /* check the open count. unionfs needs to open before readdir. */
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+ if (vn_lock(vp, LK_UPGRADE) != 0)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ locked = 1;
+ }
+ unp = VTOUNIONFS(vp);
+ if (unp == NULL)
+ error = EBADF;
+ else {
+ uvp = unp->un_uppervp;
+ lvp = unp->un_lowervp;
+ unionfs_get_node_status(unp, td, &unsp);
+ if ((uvp != NULLVP && unsp->uns_upper_opencnt <= 0) ||
+ (lvp != NULLVP && unsp->uns_lower_opencnt <= 0)) {
+ unionfs_tryrem_node_status(unp, unsp);
+ error = EBADF;
+ }
+ }
+ if (locked)
+ vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
+ if (error != 0)
+ goto unionfs_readdir_exit;
+
/* check opaque */
if (uvp != NULLVP && lvp != NULLVP) {
if ((error = VOP_GETATTR(uvp, &va, ap->a_cred)) != 0)
@@ -1524,22 +1541,6 @@
lvp = NULLVP;
}
- /* check the open count. unionfs needs to open before readdir. */
- if (VOP_ISLOCKED(ap->a_vp) != LK_EXCLUSIVE) {
- vn_lock(ap->a_vp, LK_UPGRADE | LK_RETRY);
- locked = 1;
- }
- unionfs_get_node_status(unp, td, &unsp);
- if ((uvp != NULLVP && unsp->uns_upper_opencnt <= 0) ||
- (lvp != NULLVP && unsp->uns_lower_opencnt <= 0)) {
- unionfs_tryrem_node_status(unp, unsp);
- error = EBADF;
- }
- if (locked == 1)
- vn_lock(ap->a_vp, LK_DOWNGRADE | LK_RETRY);
- if (error != 0)
- goto unionfs_readdir_exit;
-
/* upper only */
if (uvp != NULLVP && lvp == NULLVP) {
error = VOP_READDIR(uvp, uio, ap->a_cred, ap->a_eofflag,
@@ -1576,7 +1577,7 @@
unsp->uns_readdir_status = 1;
/*
- * ufs(and other fs) needs size of uio_resid larger than
+ * UFS(and other FS) needs size of uio_resid larger than
* DIRBLKSIZ.
* size of DIRBLKSIZ equals DEV_BSIZE.
* (see: ufs/ufs/ufs_vnops.c ufs_readdir func , ufs/ufs/dir.h)
@@ -1585,7 +1586,7 @@
goto unionfs_readdir_exit;
/*
- * backup cookies
+ * Backup cookies.
* It prepares to readdir in lower.
*/
if (ap->a_ncookies != NULL) {
@@ -1601,6 +1602,11 @@
/* initialize for readdir in lower */
if (unsp->uns_readdir_status == 1) {
unsp->uns_readdir_status = 2;
+ /*
+ * Backup uio_offset. See the comment after the
+ * VOP_READDIR call on the lower layer.
+ */
+ uio_offset_bk = uio->uio_offset;
uio->uio_offset = 0;
}
@@ -1612,6 +1618,19 @@
error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
ap->a_ncookies, ap->a_cookies);
+ /*
+ * We can't return an uio_offset of 0: this would trigger an
+ * infinite loop, because the next call to unionfs_readdir would
+ * always restart with the upper layer (uio_offset == 0) and
+ * always return some data.
+ *
+ * This happens when the lower layer root directory is removed.
+ * (A root directory deleting of unionfs should not be permitted.
+ * But current VFS can not do it.)
+ */
+ if (uio->uio_offset == 0)
+ uio->uio_offset = uio_offset_bk;
+
if (cookies_bk != NULL) {
/* merge cookies */
int size;
@@ -1623,7 +1642,7 @@
pos = newcookies;
memcpy(pos, cookies_bk, ncookies_bk * sizeof(u_long));
- pos += ncookies_bk * sizeof(u_long);
+ pos += ncookies_bk;
memcpy(pos, *(ap->a_cookies), *(ap->a_ncookies) * sizeof(u_long));
free(cookies_bk, M_TEMP);
free(*(ap->a_cookies), M_TEMP);
@@ -1702,7 +1721,7 @@
unionfs_inactive(struct vop_inactive_args *ap)
{
ap->a_vp->v_object = NULL;
- vrecycle(ap->a_vp, ap->a_td);
+ vrecycle(ap->a_vp);
return (0);
}
@@ -1743,18 +1762,66 @@
}
static int
-unionfs_get_llt_revlock(int flags)
+unionfs_islocked(struct vop_islocked_args *ap)
{
- int count;
-
- flags &= LK_TYPE_MASK;
- for (count = 0; un_llt[count].lock != 0; count++) {
- if (flags == un_llt[count].lock) {
- return un_llt[count].revlock;
- }
+ struct unionfs_node *unp;
+
+ KASSERT_UNIONFS_VNODE(ap->a_vp);
+
+ unp = VTOUNIONFS(ap->a_vp);
+ if (unp == NULL)
+ return (vop_stdislocked(ap));
+
+ if (unp->un_uppervp != NULLVP)
+ return (VOP_ISLOCKED(unp->un_uppervp));
+ if (unp->un_lowervp != NULLVP)
+ return (VOP_ISLOCKED(unp->un_lowervp));
+ return (vop_stdislocked(ap));
+}
+
+static int
+unionfs_get_llt_revlock(struct vnode *vp, int flags)
+{
+ int revlock;
+
+ revlock = 0;
+
+ switch (flags & LK_TYPE_MASK) {
+ case LK_SHARED:
+ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
+ revlock = LK_UPGRADE;
+ else
+ revlock = LK_RELEASE;
+ break;
+ case LK_EXCLUSIVE:
+ case LK_UPGRADE:
+ revlock = LK_RELEASE;
+ break;
+ case LK_DOWNGRADE:
+ revlock = LK_UPGRADE;
+ break;
+ default:
+ break;
}
- return 0;
+ return (revlock);
+}
+
+/*
+ * The state of an acquired lock is adjusted similarly to
+ * the time of error generating.
+ * flags: LK_RELEASE or LK_UPGRADE
+ */
+static void
+unionfs_revlock(struct vnode *vp, int flags)
+{
+ if (flags & LK_RELEASE)
+ VOP_UNLOCK(vp, flags);
+ else {
+ /* UPGRADE */
+ if (vn_lock(vp, flags) != 0)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ }
}
static int
@@ -1763,6 +1830,7 @@
int error;
int flags;
int revlock;
+ int interlock;
int uhold;
struct mount *mp;
struct unionfs_mount *ump;
@@ -1774,15 +1842,13 @@
KASSERT_UNIONFS_VNODE(ap->a_vp);
error = 0;
+ interlock = 1;
uhold = 0;
flags = ap->a_flags;
vp = ap->a_vp;
if (LK_RELEASE == (flags & LK_TYPE_MASK) || !(flags & LK_TYPE_MASK))
- return (VOP_UNLOCK(vp, flags));
-
- if ((revlock = unionfs_get_llt_revlock(flags)) == 0)
- panic("unknown lock type: 0x%x", flags & LK_TYPE_MASK);
+ return (VOP_UNLOCK(vp, flags | LK_RELEASE));
if ((flags & LK_INTERLOCK) == 0)
VI_LOCK(vp);
@@ -1798,6 +1864,9 @@
lvp = unp->un_lowervp;
uvp = unp->un_uppervp;
+ if ((revlock = unionfs_get_llt_revlock(vp, flags)) == 0)
+ panic("unknown lock type: 0x%x", flags & LK_TYPE_MASK);
+
if ((mp->mnt_kern_flag & MNTK_MPSAFE) != 0 &&
(vp->v_iflag & VI_OWEINACT) != 0)
flags |= LK_NOWAIT;
@@ -1811,6 +1880,23 @@
flags |= LK_CANRECURSE;
if (lvp != NULLVP) {
+ if (uvp != NULLVP && flags & LK_UPGRADE) {
+ /* Share Lock is once released and a deadlock is avoided. */
+ VI_LOCK_FLAGS(uvp, MTX_DUPOK);
+ vholdl(uvp);
+ uhold = 1;
+ VI_UNLOCK(vp);
+ VOP_UNLOCK(uvp, LK_RELEASE | LK_INTERLOCK);
+ VI_LOCK(vp);
+ unp = VTOUNIONFS(vp);
+ if (unp == NULL) {
+ /* vnode is released. */
+ VI_UNLOCK(vp);
+ VOP_UNLOCK(lvp, LK_RELEASE);
+ vdrop(uvp);
+ return (EBUSY);
+ }
+ }
VI_LOCK_FLAGS(lvp, MTX_DUPOK);
flags |= LK_INTERLOCK;
vholdl(lvp);
@@ -1823,19 +1909,28 @@
VI_LOCK(vp);
unp = VTOUNIONFS(vp);
if (unp == NULL) {
+ /* vnode is released. */
VI_UNLOCK(vp);
if (error == 0)
- VOP_UNLOCK(lvp, 0);
+ VOP_UNLOCK(lvp, LK_RELEASE);
vdrop(lvp);
+ if (uhold != 0)
+ vdrop(uvp);
return (vop_stdlock(ap));
}
}
if (error == 0 && uvp != NULLVP) {
+ if (uhold && flags & LK_UPGRADE) {
+ flags &= ~LK_TYPE_MASK;
+ flags |= LK_EXCLUSIVE;
+ }
VI_LOCK_FLAGS(uvp, MTX_DUPOK);
flags |= LK_INTERLOCK;
- vholdl(uvp);
- uhold = 1;
+ if (uhold == 0) {
+ vholdl(uvp);
+ uhold = 1;
+ }
VI_UNLOCK(vp);
ap->a_flags &= ~LK_INTERLOCK;
@@ -1845,30 +1940,27 @@
VI_LOCK(vp);
unp = VTOUNIONFS(vp);
if (unp == NULL) {
+ /* vnode is released. */
VI_UNLOCK(vp);
- if (error == 0) {
- VOP_UNLOCK(uvp, 0);
- if (lvp != NULLVP)
- VOP_UNLOCK(lvp, 0);
+ if (error == 0)
+ VOP_UNLOCK(uvp, LK_RELEASE);
+ vdrop(uvp);
+ if (lvp != NULLVP) {
+ VOP_UNLOCK(lvp, LK_RELEASE);
+ vdrop(lvp);
}
- if (lvp != NULLVP)
- vdrop(lvp);
- vdrop(uvp);
return (vop_stdlock(ap));
}
-
if (error != 0 && lvp != NULLVP) {
+ /* rollback */
VI_UNLOCK(vp);
- if ((revlock & LK_TYPE_MASK) == LK_RELEASE)
- VOP_UNLOCK(lvp, revlock);
- else
- vn_lock(lvp, revlock | LK_RETRY);
- goto unionfs_lock_abort;
+ unionfs_revlock(lvp, revlock);
+ interlock = 0;
}
}
- VI_UNLOCK(vp);
-unionfs_lock_abort:
+ if (interlock)
+ VI_UNLOCK(vp);
if (lvp != NULLVP)
vdrop(lvp);
if (uhold != 0)
@@ -2013,7 +2105,7 @@
unionfs_tryrem_node_status(unp, unsp);
}
- VOP_UNLOCK(vp, 0);
+ VOP_UNLOCK(vp, LK_RELEASE);
error = VOP_ADVLOCK(uvp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags);
@@ -2022,7 +2114,7 @@
return error;
unionfs_advlock_abort:
- VOP_UNLOCK(vp, 0);
+ VOP_UNLOCK(vp, LK_RELEASE);
UNIONFS_INTERNAL_DEBUG("unionfs_advlock: leave (%d)\n", error);
@@ -2150,7 +2242,8 @@
error = VOP_OPENEXTATTR(tvp, ap->a_cred, ap->a_td);
if (error == 0) {
- vn_lock(vp, LK_UPGRADE | LK_RETRY);
+ if (vn_lock(vp, LK_UPGRADE) != 0)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (tvp == unp->un_uppervp)
unp->un_flag |= UNIONFS_OPENEXTU;
else
@@ -2186,7 +2279,8 @@
error = VOP_CLOSEEXTATTR(tvp, ap->a_commit, ap->a_cred, ap->a_td);
if (error == 0) {
- vn_lock(vp, LK_UPGRADE | LK_RETRY);
+ if (vn_lock(vp, LK_UPGRADE) != 0)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (tvp == unp->un_uppervp)
unp->un_flag &= ~UNIONFS_OPENEXTU;
else
@@ -2435,6 +2529,7 @@
.vop_getextattr = unionfs_getextattr,
.vop_getwritemount = unionfs_getwritemount,
.vop_inactive = unionfs_inactive,
+ .vop_islocked = unionfs_islocked,
.vop_ioctl = unionfs_ioctl,
.vop_link = unionfs_link,
.vop_listextattr = unionfs_listextattr,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/acpica/acpi_machdep.c
--- a/head/sys/i386/acpica/acpi_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/acpica/acpi_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/i386/acpica/acpi_machdep.c 235556 2012-05-17 17:58:53Z jhb $");
#include <sys/param.h>
#include <sys/bus.h>
@@ -44,8 +44,6 @@
#include <machine/nexusvar.h>
-SYSCTL_DECL(_debug_acpi);
-
uint32_t acpi_resume_beep;
TUNABLE_INT("debug.acpi.resume_beep", &acpi_resume_beep);
SYSCTL_UINT(_debug_acpi, OID_AUTO, resume_beep, CTLFLAG_RW, &acpi_resume_beep,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/acpica/acpi_wakecode.S
--- a/head/sys/i386/acpica/acpi_wakecode.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/acpica/acpi_wakecode.S Wed Jul 25 16:40:53 2012 +0300
@@ -1,6 +1,8 @@
/*-
* Copyright (c) 2001 Takanori Watanabe <takawata at jp.freebsd.org>
- * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki at jp.freebsd.org>
+ * Copyright (c) 2001-2012 Mitsuru IWASAKI <iwasaki at jp.freebsd.org>
+ * Copyright (c) 2003 Peter Wemm
+ * Copyright (c) 2008-2012 Jung-uk Kim <jkim at FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -24,11 +26,13 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/i386/acpica/acpi_wakecode.S 237027 2012-06-13 21:03:01Z jkim $
*/
#include <machine/asmacros.h>
+#include <machine/ppireg.h>
#include <machine/specialreg.h>
+#include <machine/timerreg.h>
#include "assym.s"
@@ -39,221 +43,166 @@
* Depending on the previous sleep state, we may need to initialize more
* of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk).
*/
- .align 4
+
+ .data /* So we can modify it */
+
+ ALIGN_TEXT
.code16
-wakeup_16:
- nop
- cli
- cld
-
+wakeup_start:
/*
* Set up segment registers for real mode, a small stack for
* any calls we make, and clear any flags.
*/
- movw %cs,%ax
- movw %ax,%ds
- movw %ax,%ss
- movw $PAGE_SIZE,%sp
- pushl $0
- popfl
+ cli /* make sure no interrupts */
+ mov %cs, %ax /* copy %cs to %ds. Remember these */
+ mov %ax, %ds /* are offsets rather than selectors */
+ mov %ax, %ss
+ movw $PAGE_SIZE, %sp
+ xorw %ax, %ax
+ pushw %ax
+ popfw
/* To debug resume hangs, beep the speaker if the user requested. */
- cmpl $1,resume_beep
- jne nobeep
- movb $0xc0,%al
- outb %al,$0x42
- movb $0x04,%al
- outb %al,$0x42
- inb $0x61,%al
- orb $0x3,%al
- outb %al,$0x61
-nobeep:
+ testb $~0, resume_beep - wakeup_start
+ jz 1f
+ movb $0, resume_beep - wakeup_start
+
+ /* Set PIC timer2 to beep. */
+ movb $(TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT), %al
+ outb %al, $TIMER_MODE
+
+ /* Turn on speaker. */
+ inb $IO_PPI, %al
+ orb $PIT_SPKR, %al
+ outb %al, $IO_PPI
+
+ /* Set frequency. */
+ movw $0x4c0, %ax
+ outb %al, $TIMER_CNTR2
+ shrw $8, %ax
+ outb %al, $TIMER_CNTR2
+1:
/* Re-initialize video BIOS if the reset_video tunable is set. */
- cmpl $1,reset_video
- jne nobiosreset
- lcall $0xc000,$3
+ testb $~0, reset_video - wakeup_start
+ jz 1f
+ movb $0, reset_video - wakeup_start
+ lcall $0xc000, $3
- /*
- * Set up segment registers for real mode again in case the
- * previous BIOS call clobbers them.
- */
- movw %cs,%ax
- movw %ax,%ds
- movw %ax,%ss
-nobiosreset:
+ /* When we reach here, int 0x10 should be ready. Hide cursor. */
+ movb $0x01, %ah
+ movb $0x20, %ch
+ int $0x10
- /* Load GDT for real mode. Use 32 bit prefix for addresses >16 MB. */
- lgdtl physical_gdt
-
- /* Restore CR2, CR3 and CR4 */
- movl previous_cr2,%eax
- movl %eax,%cr2
- movl previous_cr3,%eax
- movl %eax,%cr3
- movl previous_cr4,%eax
- movl %eax,%cr4
-
- /* Transfer some values to protected mode with an inline stack */
-#define NVALUES 9
-#define TRANSFER_STACK32(val, idx) \
- movl val,%eax; \
- movl %eax,wakeup_32stack+(idx+1)+(idx*4)
-
- TRANSFER_STACK32(previous_ss, (NVALUES - 9))
- TRANSFER_STACK32(previous_fs, (NVALUES - 8))
- TRANSFER_STACK32(previous_ds, (NVALUES - 7))
- TRANSFER_STACK32(physical_gdt+2, (NVALUES - 6))
- TRANSFER_STACK32(where_to_recover, (NVALUES - 5))
- TRANSFER_STACK32(previous_idt+2, (NVALUES - 4))
- TRANSFER_STACK32(previous_ldt, (NVALUES - 3))
- TRANSFER_STACK32(previous_gdt+2, (NVALUES - 2))
- TRANSFER_STACK32(previous_tr, (NVALUES - 1))
- TRANSFER_STACK32(previous_cr0, (NVALUES - 0))
-
- mov physical_esp,%esi /* to be used in 32bit code */
-
- /* Enable protected mode */
- movl %cr0,%eax
- orl $(CR0_PE),%eax
- movl %eax,%cr0
-
-wakeup_sw32:
- /* Switch to protected mode by intersegmental jump */
- ljmpl $KCSEL,$0x12345678 /* Code location, to be replaced */
-
- /*
- * Now switched to protected mode without paging enabled.
- * %esi: KERNEL stack pointer (physical address)
- */
- .code32
-wakeup_32:
- nop
-
- /* Set up segment registers for protected mode */
- movw $KDSEL,%ax /* KDSEL to segment registers */
- movw %ax,%ds
- movw %ax,%es
- movw %ax,%gs
- movw %ax,%ss
- movw $KPSEL,%ax /* KPSEL to %fs */
- movw %ax,%fs
- movl %esi,%esp /* physical address stack pointer */
-
-wakeup_32stack:
- /* Operands are overwritten in 16 bit code by TRANSFER_STACK32 macro */
- pushl $0xabcdef09 /* ss + dummy */
- pushl $0xabcdef08 /* fs + gs */
- pushl $0xabcdef07 /* ds + es */
- pushl $0xabcdef06 /* gdt:base (physical address) */
- pushl $0xabcdef05 /* recover address */
- pushl $0xabcdef04 /* idt:base */
- pushl $0xabcdef03 /* ldt + idt:limit */
- pushl $0xabcdef02 /* gdt:base */
- pushl $0xabcdef01 /* TR + gdt:limit */
- pushl $0xabcdef00 /* CR0 */
-
- movl %esp,%ebp
-#define CR0_REGISTER 0(%ebp)
-#define TASK_REGISTER 4(%ebp)
-#define PREVIOUS_GDT 6(%ebp)
-#define PREVIOUS_LDT 12(%ebp)
-#define PREVIOUS_IDT 14(%ebp)
-#define RECOVER_ADDR 20(%ebp)
-#define PHYSICAL_GDT_BASE 24(%ebp)
-#define PREVIOUS_DS 28(%ebp)
-#define PREVIOUS_ES 30(%ebp)
-#define PREVIOUS_FS 32(%ebp)
-#define PREVIOUS_GS 34(%ebp)
-#define PREVIOUS_SS 36(%ebp)
-
- /* Fixup TSS type field */
-#define TSS_TYPEFIX_MASK 0xf9
- xorl %esi,%esi
- movl PHYSICAL_GDT_BASE,%ebx
- movw TASK_REGISTER,%si
- leal (%ebx,%esi),%eax /* get TSS segment descriptor */
- andb $TSS_TYPEFIX_MASK,5(%eax)
-
- /* Prepare to return to sleep/wakeup code point */
- lgdtl PREVIOUS_GDT
- lidtl PREVIOUS_IDT
-
- /* Pack values from the GDT to be loaded into segment registers. */
- movl PREVIOUS_DS,%ebx
- movl PREVIOUS_FS,%ecx
- movl PREVIOUS_SS,%edx
- movw TASK_REGISTER,%si
- shll $16,%esi
- movw PREVIOUS_LDT,%si
- movl RECOVER_ADDR,%edi
-
- /* Enable paging and etc. */
- movl CR0_REGISTER,%eax
- movl %eax,%cr0
-
- /* Flush the prefetch queue */
- jmp 1f
-1: jmp 1f
+ /* Re-start in case the previous BIOS call clobbers them. */
+ jmp wakeup_start
1:
/*
- * Now we are in kernel virtual memory addressing with the following
- * original register values:
- * %ebx: ds + es
- * %ecx: fs + gs
- * %edx: ss + dummy
- * %esi: LDTR + TR
- * %edi: recover address
- * We'll load these back into the segment registers now.
+ * Find relocation base and patch the gdt descript and ljmp targets
*/
- nop
+ xorl %ebx, %ebx
+ mov %cs, %bx
+ sall $4, %ebx /* %ebx is now our relocation base */
- movl %esi,%eax /* LDTR + TR */
- lldt %ax /* load LDT register */
- shrl $16,%eax
- ltr %ax /* load task register */
+ /*
+ * Load the descriptor table pointer. We'll need it when running
+ * in 16-bit protected mode.
+ */
+ lgdtl bootgdtdesc - wakeup_start
- /* Restore segment registers */
- movl %ebx,%eax /* ds + es */
- movw %ax,%ds
- shrl $16,%eax
- movw %ax,%es
- movl %ecx,%eax /* fs + gs */
- movw %ax,%fs
- shrl $16,%eax
- movw %ax,%gs
- movl %edx,%eax /* ss */
- movw %ax,%ss
+ /* Enable protected mode */
+ movl $CR0_PE, %eax
+ mov %eax, %cr0
- /* Jump to acpi_restorecpu() */
- jmp *%edi
+ /*
+ * Now execute a far jump to turn on protected mode. This
+ * causes the segment registers to turn into selectors and causes
+ * %cs to be loaded from the gdt.
+ *
+ * The following instruction is:
+ * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
+ */
+wakeup_sw32:
+ .byte 0x66 /* size override to 32 bits */
+ .byte 0xea /* opcode for far jump */
+ .long wakeup_32 - wakeup_start /* offset in segment */
+ .word bootcode32 - bootgdt /* index in gdt for 32 bit code */
-/* used in real mode */
-physical_gdt: .word 0
- .long 0
-physical_esp: .long 0
-previous_cr2: .long 0
-previous_cr3: .long 0
-previous_cr4: .long 0
-resume_beep: .long 0
-reset_video: .long 0
+ /*
+ * At this point, we are running in 32 bit legacy protected mode.
+ */
+ ALIGN_TEXT
+ .code32
+wakeup_32:
-/*
- * Transfer from real mode to protected mode. The order of these variables
- * is very important, DO NOT INSERT OR CHANGE unless you know why.
- */
-previous_cr0: .long 0
-previous_tr: .word 0
-previous_gdt: .word 0
- .long 0
-previous_ldt: .word 0
-previous_idt: .word 0
- .long 0
-where_to_recover: .long 0
-previous_ds: .word 0
-previous_es: .word 0
-previous_fs: .word 0
-previous_gs: .word 0
-previous_ss: .word 0
-dummy: .word 0
+ mov $bootdata32 - bootgdt, %eax
+ mov %ax, %ds
+
+ /* Get PCB and return address. */
+ movl wakeup_pcb - wakeup_start(%ebx), %ecx
+ movl wakeup_ret - wakeup_start(%ebx), %edx
+
+ /* Restore CR4 and CR3. */
+ movl wakeup_cr4 - wakeup_start(%ebx), %eax
+ mov %eax, %cr4
+ movl wakeup_cr3 - wakeup_start(%ebx), %eax
+ mov %eax, %cr3
+
+ /*
+ * Finally, switch to long bit mode by enabling paging. We have
+ * to be very careful here because all the segmentation disappears
+ * out from underneath us. The spec says we can depend on the
+ * subsequent pipelined branch to execute, but *only if* everthing
+ * is still identity mapped. If any mappings change, the pipeline
+ * will flush.
+ */
+ mov %cr0, %eax
+ orl $CR0_PG, %eax
+ mov %eax, %cr0
+
+ jmp 1f
+1:
+ /* Jump to return address. */
+ jmp *%edx
+
+ .data
+
+resume_beep:
+ .byte 0
+reset_video:
+ .byte 0
+
+ ALIGN_DATA
+bootgdt:
+ .long 0x00000000
+ .long 0x00000000
+
+bootcode32:
+ .long 0x0000ffff
+ .long 0x00cf9b00
+
+bootdata32:
+ .long 0x0000ffff
+ .long 0x00cf9300
+bootgdtend:
+
+bootgdtdesc:
+ .word bootgdtend - bootgdt /* Length */
+ .long bootgdt - wakeup_start /* Offset plus %ds << 4 */
+
+ ALIGN_DATA
+wakeup_cr4:
+ .long 0
+wakeup_cr3:
+ .long 0
+wakeup_pcb:
+ .long 0
+wakeup_ret:
+ .long 0
+wakeup_gdt: /* not used */
+ .word 0
+ .long 0
+dummy:
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/acpica/acpi_wakeup.c
--- a/head/sys/i386/acpica/acpi_wakeup.c Wed Jul 25 16:32:50 2012 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,371 +0,0 @@
-/*-
- * Copyright (c) 2001 Takanori Watanabe <takawata at jp.freebsd.org>
- * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki at jp.freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/acpica/acpi_wakeup.c 233250 2012-03-20 21:37:52Z jkim $");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bus.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/proc.h>
-#include <sys/sysctl.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-
-#include <machine/bus.h>
-#include <machine/cpufunc.h>
-#include <machine/intr_machdep.h>
-#include <x86/mca.h>
-#include <machine/segments.h>
-
-#include <contrib/dev/acpica/include/acpi.h>
-
-#include <dev/acpica/acpivar.h>
-
-#include "acpi_wakecode.h"
-#include "acpi_wakedata.h"
-
-/* Make sure the code is less than one page and leave room for the stack. */
-CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024);
-
-#ifndef _SYS_CDEFS_H_
-#error this file needs sys/cdefs.h as a prerequisite
-#endif
-
-extern uint32_t acpi_resume_beep;
-extern uint32_t acpi_reset_video;
-extern void initializecpu(void);
-
-static struct region_descriptor __used saved_idt, saved_gdt;
-static struct region_descriptor *p_gdt;
-static uint16_t __used saved_ldt;
-
-static uint32_t __used r_eax, r_ebx, r_ecx, r_edx, r_ebp, r_esi, r_edi,
- r_efl, r_cr0, r_cr2, r_cr3, r_cr4, ret_addr;
-
-static uint16_t __used r_cs, r_ds, r_es, r_fs, r_gs, r_ss, r_tr;
-static uint32_t __used r_esp;
-
-static void acpi_printcpu(void);
-static void acpi_realmodeinst(void *arg, bus_dma_segment_t *segs,
- int nsegs, int error);
-static void acpi_alloc_wakeup_handler(void);
-
-/* XXX shut gcc up */
-extern int acpi_savecpu(void);
-extern int acpi_restorecpu(void);
-
-#ifdef __GNUCLIKE_ASM
-__asm__(" \n\
- .text \n\
- .p2align 2, 0x90 \n\
- .type acpi_restorecpu, @function\n\
-acpi_restorecpu: \n\
- .align 4 \n\
- movl r_eax,%eax \n\
- movl r_ebx,%ebx \n\
- movl r_ecx,%ecx \n\
- movl r_edx,%edx \n\
- movl r_ebp,%ebp \n\
- movl r_esi,%esi \n\
- movl r_edi,%edi \n\
- movl r_esp,%esp \n\
- \n\
- pushl r_efl \n\
- popfl \n\
- \n\
- movl ret_addr,%eax \n\
- movl %eax,(%esp) \n\
- xorl %eax,%eax \n\
- ret \n\
- \n\
- .text \n\
- .p2align 2, 0x90 \n\
- .type acpi_savecpu, @function \n\
-acpi_savecpu: \n\
- movw %cs,r_cs \n\
- movw %ds,r_ds \n\
- movw %es,r_es \n\
- movw %fs,r_fs \n\
- movw %gs,r_gs \n\
- movw %ss,r_ss \n\
- \n\
- movl %eax,r_eax \n\
- movl %ebx,r_ebx \n\
- movl %ecx,r_ecx \n\
- movl %edx,r_edx \n\
- movl %ebp,r_ebp \n\
- movl %esi,r_esi \n\
- movl %edi,r_edi \n\
- \n\
- movl %cr0,%eax \n\
- movl %eax,r_cr0 \n\
- movl %cr2,%eax \n\
- movl %eax,r_cr2 \n\
- movl %cr3,%eax \n\
- movl %eax,r_cr3 \n\
- movl %cr4,%eax \n\
- movl %eax,r_cr4 \n\
- \n\
- pushfl \n\
- popl r_efl \n\
- \n\
- movl %esp,r_esp \n\
- \n\
- sgdt saved_gdt \n\
- sidt saved_idt \n\
- sldt saved_ldt \n\
- str r_tr \n\
- \n\
- movl (%esp),%eax \n\
- movl %eax,ret_addr \n\
- movl $1,%eax \n\
- ret \n\
-");
-#endif /* __GNUCLIKE_ASM */
-
-static void
-acpi_printcpu(void)
-{
- printf("======== acpi_printcpu() debug dump ========\n");
- printf("gdt[%04x:%08x] idt[%04x:%08x] ldt[%04x] tr[%04x] efl[%08x]\n",
- saved_gdt.rd_limit, saved_gdt.rd_base,
- saved_idt.rd_limit, saved_idt.rd_base,
- saved_ldt, r_tr, r_efl);
- printf("eax[%08x] ebx[%08x] ecx[%08x] edx[%08x]\n",
- r_eax, r_ebx, r_ecx, r_edx);
- printf("esi[%08x] edi[%08x] ebp[%08x] esp[%08x]\n",
- r_esi, r_edi, r_ebp, r_esp);
- printf("cr0[%08x] cr2[%08x] cr3[%08x] cr4[%08x]\n",
- r_cr0, r_cr2, r_cr3, r_cr4);
- printf("cs[%04x] ds[%04x] es[%04x] fs[%04x] gs[%04x] ss[%04x]\n",
- r_cs, r_ds, r_es, r_fs, r_gs, r_ss);
-}
-
-#define WAKECODE_FIXUP(offset, type, val) do { \
- type *addr; \
- addr = (type *)(sc->acpi_wakeaddr + offset); \
- *addr = val; \
-} while (0)
-
-#define WAKECODE_BCOPY(offset, type, val) do { \
- void *addr; \
- addr = (void *)(sc->acpi_wakeaddr + offset); \
- bcopy(&(val), addr, sizeof(type)); \
-} while (0)
-
-/* Turn off bits 1&2 of the PIT, stopping the beep. */
-static void
-acpi_stop_beep(void *arg)
-{
- outb(0x61, inb(0x61) & ~0x3);
-}
-
-int
-acpi_sleep_machdep(struct acpi_softc *sc, int state)
-{
- ACPI_STATUS status;
- struct pmap *pm;
- int ret;
- uint32_t cr3;
- u_long ef;
-
- ret = -1;
- if (sc->acpi_wakeaddr == 0)
- return (ret);
-
- AcpiSetFirmwareWakingVector(sc->acpi_wakephys);
-
- ef = intr_disable();
- intr_suspend();
-
- /*
- * Temporarily switch to the kernel pmap because it provides an
- * identity mapping (setup at boot) for the low physical memory
- * region containing the wakeup code.
- */
- pm = kernel_pmap;
- cr3 = rcr3();
-#ifdef PAE
- load_cr3(vtophys(pm->pm_pdpt));
-#else
- load_cr3(vtophys(pm->pm_pdir));
-#endif
-
- ret_addr = 0;
- if (acpi_savecpu()) {
- /* Execute Sleep */
-
- p_gdt = (struct region_descriptor *)
- (sc->acpi_wakeaddr + physical_gdt);
- p_gdt->rd_limit = saved_gdt.rd_limit;
- p_gdt->rd_base = vtophys(saved_gdt.rd_base);
-
- WAKECODE_FIXUP(physical_esp, uint32_t, vtophys(r_esp));
- WAKECODE_FIXUP(previous_cr0, uint32_t, r_cr0);
- WAKECODE_FIXUP(previous_cr2, uint32_t, r_cr2);
- WAKECODE_FIXUP(previous_cr3, uint32_t, r_cr3);
- WAKECODE_FIXUP(previous_cr4, uint32_t, r_cr4);
-
- WAKECODE_FIXUP(resume_beep, uint32_t, acpi_resume_beep);
- WAKECODE_FIXUP(reset_video, uint32_t, acpi_reset_video);
-
- WAKECODE_FIXUP(previous_tr, uint16_t, r_tr);
- WAKECODE_BCOPY(previous_gdt, struct region_descriptor, saved_gdt);
- WAKECODE_FIXUP(previous_ldt, uint16_t, saved_ldt);
- WAKECODE_BCOPY(previous_idt, struct region_descriptor, saved_idt);
-
- WAKECODE_FIXUP(where_to_recover, void *, acpi_restorecpu);
-
- WAKECODE_FIXUP(previous_ds, uint16_t, r_ds);
- WAKECODE_FIXUP(previous_es, uint16_t, r_es);
- WAKECODE_FIXUP(previous_fs, uint16_t, r_fs);
- WAKECODE_FIXUP(previous_gs, uint16_t, r_gs);
- WAKECODE_FIXUP(previous_ss, uint16_t, r_ss);
-
- if (bootverbose)
- acpi_printcpu();
-
- /* Call ACPICA to enter the desired sleep state */
- if (state == ACPI_STATE_S4 && sc->acpi_s4bios)
- status = AcpiEnterSleepStateS4bios();
- else
- status = AcpiEnterSleepState(state, acpi_sleep_flags);
-
- if (status != AE_OK) {
- device_printf(sc->acpi_dev,
- "AcpiEnterSleepState failed - %s\n",
- AcpiFormatException(status));
- goto out;
- }
-
- for (;;)
- ia32_pause();
- } else {
- pmap_init_pat();
- PCPU_SET(switchtime, 0);
- PCPU_SET(switchticks, ticks);
- if (bootverbose) {
- acpi_savecpu();
- acpi_printcpu();
- }
- ret = 0;
- }
-
-out:
- load_cr3(cr3);
- mca_resume();
- intr_resume();
- intr_restore(ef);
-
- if (ret == 0 && mem_range_softc.mr_op != NULL &&
- mem_range_softc.mr_op->reinit != NULL)
- mem_range_softc.mr_op->reinit(&mem_range_softc);
-
- /* If we beeped, turn it off after a delay. */
- if (acpi_resume_beep)
- timeout(acpi_stop_beep, NULL, 3 * hz);
-
- return (ret);
-}
-
-static bus_dma_tag_t acpi_waketag;
-static bus_dmamap_t acpi_wakemap;
-static vm_offset_t acpi_wakeaddr;
-
-static void
-acpi_alloc_wakeup_handler(void)
-{
- void *wakeaddr;
-
- if (!cold)
- return;
-
- /*
- * Specify the region for our wakeup code. We want it in the low 1 MB
- * region, excluding video memory and above (0xa0000). We ask for
- * it to be page-aligned, just to be safe.
- */
- if (bus_dma_tag_create(/*parent*/ NULL,
- /*alignment*/ PAGE_SIZE, /*no boundary*/ 0,
- /*lowaddr*/ 0x9ffff, /*highaddr*/ BUS_SPACE_MAXADDR, NULL, NULL,
- /*maxsize*/ PAGE_SIZE, /*segments*/ 1, /*maxsegsize*/ PAGE_SIZE,
- 0, busdma_lock_mutex, &Giant, &acpi_waketag) != 0) {
- printf("acpi_alloc_wakeup_handler: can't create wake tag\n");
- return;
- }
- if (bus_dmamem_alloc(acpi_waketag, &wakeaddr, BUS_DMA_NOWAIT,
- &acpi_wakemap) != 0) {
- printf("acpi_alloc_wakeup_handler: can't alloc wake memory\n");
- return;
- }
- acpi_wakeaddr = (vm_offset_t)wakeaddr;
-}
-
-SYSINIT(acpiwakeup, SI_SUB_KMEM, SI_ORDER_ANY, acpi_alloc_wakeup_handler, 0);
-
-static void
-acpi_realmodeinst(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
-{
- struct acpi_softc *sc;
- uint32_t *addr;
-
- /* Overwrite the ljmp target with the real address */
- sc = arg;
- sc->acpi_wakephys = segs[0].ds_addr;
- addr = (uint32_t *)&wakecode[wakeup_sw32 + 2];
- *addr = sc->acpi_wakephys + wakeup_32;
-
- /* Copy the wake code into our low page and save its physical addr. */
- bcopy(wakecode, (void *)sc->acpi_wakeaddr, sizeof(wakecode));
- if (bootverbose) {
- device_printf(sc->acpi_dev, "wakeup code va %#x pa %#jx\n",
- acpi_wakeaddr, (uintmax_t)sc->acpi_wakephys);
- }
-}
-
-void
-acpi_install_wakeup_handler(struct acpi_softc *sc)
-{
- if (acpi_wakeaddr == 0)
- return;
-
- sc->acpi_waketag = acpi_waketag;
- sc->acpi_wakeaddr = acpi_wakeaddr;
- sc->acpi_wakemap = acpi_wakemap;
-
- bus_dmamap_load(sc->acpi_waketag, sc->acpi_wakemap,
- (void *)sc->acpi_wakeaddr, PAGE_SIZE, acpi_realmodeinst, sc, 0);
-}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/conf/GENERIC
--- a/head/sys/i386/conf/GENERIC Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/conf/GENERIC Wed Jul 25 16:40:53 2012 +0300
@@ -16,7 +16,7 @@
# If you are in doubt as to the purpose or necessity of a line, check first
# in NOTES.
#
-# $FreeBSD: head/sys/i386/conf/GENERIC 234504 2012-04-20 21:37:42Z brooks $
+# $FreeBSD: head/sys/i386/conf/GENERIC 237263 2012-06-19 07:34:13Z np $
cpu I486_CPU
cpu I586_CPU
@@ -30,6 +30,7 @@
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
options INET6 # IPv6 communications protocols
+options TCP_OFFLOAD # TCP offload
options SCTP # Stream Control Transmission Protocol
options FFS # Berkeley Fast Filesystem
options SOFTUPDATES # Enable FFS soft updates support
@@ -46,6 +47,7 @@
options PROCFS # Process filesystem (requires PSEUDOFS)
options PSEUDOFS # Pseudo-filesystem framework
options GEOM_PART_GPT # GUID Partition Tables.
+options GEOM_RAID # Soft RAID functionality.
options GEOM_LABEL # Provides labelization
options COMPAT_FREEBSD4 # Compatible with FreeBSD4
options COMPAT_FREEBSD5 # Compatible with FreeBSD5
@@ -66,6 +68,7 @@
options CAPABILITIES # Capsicum capabilities
options MAC # TrustedBSD MAC Framework
options KDTRACE_HOOKS # Kernel DTrace hooks
+options DDB_CTF # Kernel ELF linker loads CTF data
options INCLUDE_CONFIG_FILE # Include this file in kernel
# Debugging support. Always need this:
@@ -75,7 +78,6 @@
# For full debugger support use this instead:
options DDB # Support DDB.
options GDB # Support remote GDB.
-options DDB_CTF # kernel ELF linker loads CTF data
options DEADLKRES # Enable the deadlock resolver
options INVARIANTS # Enable calls of extra sanity checking
options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
@@ -284,6 +286,8 @@
device ath_pci # Atheros pci/cardbus glue
device ath_hal # pci/cardbus chip support
options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors
+options AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation
+options ATH_ENABLE_11N # Enable 802.11n support for AR5416 and later
device ath_rate_sample # SampleRate tx rate control for ath
#device bwi # Broadcom BCM430x/BCM431x wireless NICs.
#device bwn # Broadcom BCM43xx wireless NICs.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/conf/XEN
--- a/head/sys/i386/conf/XEN Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/conf/XEN Wed Jul 25 16:40:53 2012 +0300
@@ -1,13 +1,13 @@
#
# XEN -- Kernel configuration for i386 XEN DomU
#
-# $FreeBSD: head/sys/i386/conf/XEN 233271 2012-03-21 08:38:42Z ed $
+# $FreeBSD: head/sys/i386/conf/XEN 237263 2012-06-19 07:34:13Z np $
cpu I686_CPU
ident XEN
makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
-makeoptions WITHOUT_MODULES="aha ahb amd cxgb dpt drm hptmv ida malo mps mwl nve sound sym trm xfs"
+makeoptions WITHOUT_MODULES="aha ahb amd cxgb dpt drm drm2 hptmv ida malo mps mwl nve rdma sound sym trm xfs"
options SCHED_ULE # ULE scheduler
options PREEMPTION # Enable kernel thread preemption
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/apic_vector.s
--- a/head/sys/i386/i386/apic_vector.s Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/apic_vector.s Wed Jul 25 16:40:53 2012 +0300
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* from: vector.s, 386BSD 0.1 unknown origin
- * $FreeBSD$
+ * $FreeBSD: head/sys/i386/i386/apic_vector.s 235683 2012-05-20 08:17:20Z iwasaki $
*/
/*
@@ -334,6 +334,26 @@
iret
/*
+ * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
+ */
+#ifndef XEN
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(cpususpend)
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
+
+ movl lapic, %eax
+ movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
+
+ call cpususpend_handler
+
+ POP_FRAME
+ jmp doreti_iret
+#endif
+
+/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
*
* - Calls the generic rendezvous action function.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/bios.c
--- a/head/sys/i386/i386/bios.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/bios.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/i386/i386/bios.c 236213 2012-05-29 01:48:06Z kevlo $");
/*
* Code for dealing with the BIOS in x86 PC systems.
@@ -372,9 +372,11 @@
break;
default:
+ va_end(ap);
return (EINVAL);
}
}
+ va_end(ap);
if (flags & BIOSARGS_FLAG) {
if (arg_end - arg_start > ctob(16))
@@ -448,9 +450,11 @@
break;
default:
+ va_end(ap);
return (EINVAL);
}
}
+ va_end(ap);
set_bios_selectors(&args->seg, flags);
bioscall_vector.vec16.offset = (u_short)args->entry;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/elf_machdep.c
--- a/head/sys/i386/i386/elf_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/elf_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/i386/i386/elf_machdep.c 237435 2012-06-22 07:16:29Z kib $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -74,12 +74,15 @@
.sv_setregs = exec_setregs,
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
- .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32,
+ .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP,
.sv_set_syscall_retval = cpu_set_syscall_retval,
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = syscallnames,
+ .sv_shared_page_base = SHAREDPAGE,
+ .sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = NULL,
};
+INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
static Elf32_Brandinfo freebsd_brand_info = {
.brand = ELFOSABI_FREEBSD,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/genassym.c
--- a/head/sys/i386/i386/genassym.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/genassym.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/genassym.c 224187 2011-07-18 15:19:40Z attilio $");
+__FBSDID("$FreeBSD: head/sys/i386/i386/genassym.c 235622 2012-05-18 18:55:58Z iwasaki $");
#include "opt_apic.h"
#include "opt_compat.h"
@@ -121,7 +121,10 @@
ASSYM(KERNBASE, KERNBASE);
ASSYM(KERNLOAD, KERNLOAD);
ASSYM(MCLBYTES, MCLBYTES);
+ASSYM(PCB_CR0, offsetof(struct pcb, pcb_cr0));
+ASSYM(PCB_CR2, offsetof(struct pcb, pcb_cr2));
ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
+ASSYM(PCB_CR4, offsetof(struct pcb, pcb_cr4));
ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi));
ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi));
ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp));
@@ -130,7 +133,11 @@
ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip));
ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0));
+ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds));
+ASSYM(PCB_ES, offsetof(struct pcb, pcb_es));
+ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs));
ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
+ASSYM(PCB_SS, offsetof(struct pcb, pcb_ss));
ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
@@ -143,6 +150,7 @@
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
ASSYM(PCB_FSD, offsetof(struct pcb, pcb_fsd));
+ASSYM(PCB_GSD, offsetof(struct pcb, pcb_gsd));
ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
@@ -152,6 +160,11 @@
ASSYM(PCB_SIZE, sizeof(struct pcb));
ASSYM(PCB_VM86CALL, PCB_VM86CALL);
+ASSYM(PCB_GDT, offsetof(struct pcb, pcb_gdt));
+ASSYM(PCB_IDT, offsetof(struct pcb, pcb_idt));
+ASSYM(PCB_LDT, offsetof(struct pcb, pcb_ldt));
+ASSYM(PCB_TR, offsetof(struct pcb, pcb_tr));
+
ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip));
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/initcpu.c
--- a/head/sys/i386/i386/initcpu.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/initcpu.c Wed Jul 25 16:40:53 2012 +0300
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/initcpu.c 230767 2012-01-30 07:56:00Z kib $");
+__FBSDID("$FreeBSD: head/sys/i386/i386/initcpu.c 235622 2012-05-18 18:55:58Z iwasaki $");
#include "opt_cpu.h"
@@ -48,7 +48,6 @@
#define CPU_ENABLE_SSE
#endif
-void initializecpu(void);
#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
void enable_K5_wt_alloc(void);
void enable_K6_wt_alloc(void);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/machdep.c
--- a/head/sys/i386/i386/machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/machdep.c 234105 2012-04-10 16:08:46Z marius $");
+__FBSDID("$FreeBSD: head/sys/i386/i386/machdep.c 238310 2012-07-09 20:42:08Z jhb $");
#include "opt_apic.h"
#include "opt_atalk.h"
@@ -75,6 +75,7 @@
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/memrange.h>
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
@@ -180,7 +181,6 @@
extern void printcpuinfo(void); /* XXX header file */
extern void finishidentcpu(void);
extern void panicifcpuunsupported(void);
-extern void initializecpu(void);
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
@@ -248,6 +248,8 @@
struct mtx icu_lock;
+struct mem_range_softc mem_range_softc;
+
static void
cpu_startup(dummy)
void *dummy;
@@ -337,12 +339,10 @@
cpu_setregs();
#endif
-#ifdef SMP
/*
* Add BSP as an interrupt target.
*/
intr_add_cpu(0);
-#endif
}
/*
@@ -472,7 +472,13 @@
}
regs->tf_esp = (int)fp;
- regs->tf_eip = PS_STRINGS - szosigcode;
+ if (p->p_sysent->sv_sigcode_base != 0) {
+ regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
+ szosigcode;
+ } else {
+ /* a.out sysentvec does not use shared page */
+ regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
+ }
regs->tf_eflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucodesel;
regs->tf_ds = _udatasel;
@@ -599,7 +605,8 @@
}
regs->tf_esp = (int)sfp;
- regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
+ regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
+ szfreebsd4_sigcode;
regs->tf_eflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucodesel;
regs->tf_ds = _udatasel;
@@ -750,7 +757,7 @@
}
regs->tf_esp = (int)sfp;
- regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
+ regs->tf_eip = p->p_sysent->sv_sigcode_base;
regs->tf_eflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucodesel;
regs->tf_ds = _udatasel;
@@ -2178,7 +2185,7 @@
pt_entry_t *pte;
quad_t dcons_addr, dcons_size;
#ifndef XEN
- int hasbrokenint12, i;
+ int hasbrokenint12, i, res;
u_int extmem;
struct vm86frame vmf;
struct vm86context vmc;
@@ -2263,7 +2270,8 @@
pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
vmc.npages = 0;
smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
- vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
+ res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
+ KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
vmf.vmf_ebx = 0;
do {
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/mem.c
--- a/head/sys/i386/i386/mem.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/mem.c Wed Jul 25 16:40:53 2012 +0300
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/i386/i386/mem.c 238310 2012-07-09 20:42:08Z jhb $");
/*
* Memory special file
@@ -72,8 +72,6 @@
*/
MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
-struct mem_range_softc mem_range_softc;
-
static struct sx memsxlock;
SX_SYSINIT(memsxlockinit, &memsxlock, "/dev/mem lock");
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/minidump_machdep.c
--- a/head/sys/i386/i386/minidump_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/minidump_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/minidump_machdep.c 221173 2011-04-28 16:02:05Z attilio $");
+__FBSDID("$FreeBSD: head/sys/i386/i386/minidump_machdep.c 236503 2012-06-03 08:01:12Z avg $");
#include "opt_watchdog.h"
@@ -36,9 +36,7 @@
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <sys/msgbuf.h>
-#ifdef SW_WATCHDOG
#include <sys/watchdog.h>
-#endif
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/atomic.h>
@@ -143,9 +141,9 @@
printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
counter &= (1<<24) - 1;
}
-#ifdef SW_WATCHDOG
+
wdog_kern_pat(WD_LASTVAL);
-#endif
+
if (ptr) {
error = dump_write(di, ptr, 0, dumplo, len);
if (error)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/mp_machdep.c
--- a/head/sys/i386/i386/mp_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/mp_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/mp_machdep.c 234208 2012-04-13 07:18:19Z avg $");
+__FBSDID("$FreeBSD: head/sys/i386/i386/mp_machdep.c 236938 2012-06-12 00:14:54Z iwasaki $");
#include "opt_apic.h"
#include "opt_cpu.h"
@@ -146,6 +146,7 @@
static void *dpcpu;
struct pcb stoppcbs[MAXCPU];
+struct pcb **susppcbs = NULL;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
@@ -587,6 +588,9 @@
setidt(IPI_STOP, IDTVEC(cpustop),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ /* Install an inter-CPU IPI for CPU suspend/resume */
+ setidt(IPI_SUSPEND, IDTVEC(cpususpend),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* Set boot_cpu_id if needed. */
if (boot_cpu_id == -1) {
@@ -1077,6 +1081,60 @@
/* used as a watchpoint to signal AP startup */
cpus = mp_naps;
+ ipi_startup(apic_id, vector);
+
+ /* Wait up to 5 seconds for it to start. */
+ for (ms = 0; ms < 5000; ms++) {
+ if (mp_naps > cpus)
+ return 1; /* return SUCCESS */
+ DELAY(1000);
+ }
+ return 0; /* return FAILURE */
+}
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif /* COUNT_XINVLTLB_HITS */
+
+/*
+ * Init and startup IPI.
+ */
+void
+ipi_startup(int apic_id, int vector)
+{
+
/*
* first we do an INIT/RESET IPI this INIT IPI might be run, reseting
* and running the target CPU. OR this INIT IPI might be latched (P5
@@ -1127,52 +1185,8 @@
vector, apic_id);
lapic_ipi_wait(-1);
DELAY(200); /* wait ~200uS */
-
- /* Wait up to 5 seconds for it to start. */
- for (ms = 0; ms < 5000; ms++) {
- if (mp_naps > cpus)
- return 1; /* return SUCCESS */
- DELAY(1000);
- }
- return 0; /* return FAILURE */
}
-#ifdef COUNT_XINVLTLB_HITS
-u_int xhits_gbl[MAXCPU];
-u_int xhits_pg[MAXCPU];
-u_int xhits_rng[MAXCPU];
-static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
- sizeof(xhits_gbl), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
- sizeof(xhits_pg), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
- sizeof(xhits_rng), "IU", "");
-
-u_int ipi_global;
-u_int ipi_page;
-u_int ipi_range;
-u_int ipi_range_size;
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
- 0, "");
-
-u_int ipi_masked_global;
-u_int ipi_masked_page;
-u_int ipi_masked_range;
-u_int ipi_masked_range_size;
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
- &ipi_masked_global, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
- &ipi_masked_page, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
- &ipi_masked_range, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
- &ipi_masked_range_size, 0, "");
-#endif /* COUNT_XINVLTLB_HITS */
-
/*
* Send an IPI to specified CPU handling the bitmap logic.
*/
@@ -1498,6 +1512,39 @@
}
/*
+ * Handle an IPI_SUSPEND by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpususpend_handler(void)
+{
+ u_int cpu;
+
+ cpu = PCPU_GET(cpuid);
+
+ if (savectx(susppcbs[cpu])) {
+ wbinvd();
+ CPU_SET_ATOMIC(cpu, &suspended_cpus);
+ } else {
+ pmap_init_pat();
+ PCPU_SET(switchtime, 0);
+ PCPU_SET(switchticks, ticks);
+
+ /* Indicate that we are resumed */
+ CPU_CLR_ATOMIC(cpu, &suspended_cpus);
+ }
+
+ /* Wait for resume */
+ while (!CPU_ISSET(cpu, &started_cpus))
+ ia32_pause();
+
+ CPU_CLR_ATOMIC(cpu, &started_cpus);
+
+ /* Resume MCA and local APIC */
+ mca_resume();
+ lapic_setup(0);
+}
+/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/pmap.c
--- a/head/sys/i386/i386/pmap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/pmap.c Wed Jul 25 16:40:53 2012 +0300
@@ -75,7 +75,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/pmap.c 233433 2012-03-24 19:43:49Z alc $");
+__FBSDID("$FreeBSD: head/sys/i386/i386/pmap.c 237623 2012-06-27 03:45:25Z alc $");
/*
* Manages physical address maps.
@@ -118,6 +118,7 @@
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/sf_buf.h>
#include <sys/sx.h>
#include <sys/vmmeter.h>
@@ -231,8 +232,20 @@
static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */
/*
+ * Isolate the global pv list lock from data and other locks to prevent false
+ * sharing within the cache.
+ */
+static struct {
+ struct rwlock lock;
+ char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)];
+} pvh_global __aligned(CACHE_LINE_SIZE);
+
+#define pvh_global_lock pvh_global.lock
+
+/*
* Data for the pv entry allocation mechanism
*/
+static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
static struct md_page *pv_table;
static int shpgperproc = PMAP_SHPGPERPROC;
@@ -283,8 +296,9 @@
"Number of times pmap_pte_quick didn't change PMAP1");
static struct mtx PMAP2mutex;
+static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
+static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
@@ -391,6 +405,12 @@
kernel_pmap->pm_root = NULL;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
+
+ /*
+ * Initialize the global pv list lock.
+ */
+ rw_init(&pvh_global_lock, "pmap pv global");
+
LIST_INIT(&allpmaps);
/*
@@ -1275,7 +1295,7 @@
* scans are across different pmaps. It is very wasteful
* to do an entire invltlb for checking a single mapping.
*
- * If the given pmap is not the current pmap, vm_page_queue_mtx
+ * If the given pmap is not the current pmap, pvh_global_lock
* must be held and curthread pinned to a CPU.
*/
static pt_entry_t *
@@ -1291,7 +1311,7 @@
/* are we current address space or kernel? */
if (pmap_is_current(pmap))
return (vtopte(va));
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
newpf = *pde & PG_FRAME;
if ((*PMAP1 & PG_FRAME) != newpf) {
@@ -1840,9 +1860,9 @@
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
if (flags & M_WAITOK) {
PMAP_UNLOCK(pmap);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
VM_WAIT;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
}
@@ -2143,6 +2163,7 @@
CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
CTASSERT(_NPCM == 11);
+CTASSERT(_NPCPV == 336);
static __inline struct pv_chunk *
pv_to_chunk(pv_entry_t pv)
@@ -2156,7 +2177,7 @@
#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
#define PC_FREE10 0x0000fffful /* Free values for index 10 */
-static uint32_t pc_freemask[11] = {
+static const uint32_t pc_freemask[_NPCM] = {
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
@@ -2187,83 +2208,155 @@
"Current number of pv entry allocs");
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
"Current number of spare pv entries");
-
-static int pmap_collect_inactive, pmap_collect_active;
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
- "Current number times pmap_collect called on inactive queue");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
- "Current number times pmap_collect called on active queue");
#endif
/*
* We are in a serious low memory condition. Resort to
* drastic measures to free some pages so we can allocate
- * another pv entry chunk. This is normally called to
- * unmap inactive pages, and if necessary, active pages.
+ * another pv entry chunk.
*/
-static void
-pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
+static vm_page_t
+pmap_pv_reclaim(pmap_t locked_pmap)
{
+ struct pch newtail;
+ struct pv_chunk *pc;
+ struct md_page *pvh;
pd_entry_t *pde;
pmap_t pmap;
pt_entry_t *pte, tpte;
- pv_entry_t next_pv, pv;
+ pv_entry_t pv;
vm_offset_t va;
- vm_page_t m, free;
-
+ vm_page_t free, m, m_pc;
+ uint32_t inuse;
+ int bit, field, freed;
+
+ PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+ pmap = NULL;
+ free = m_pc = NULL;
+ TAILQ_INIT(&newtail);
sched_pin();
- TAILQ_FOREACH(m, &vpq->pl, pageq) {
- if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
- continue;
- TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
+ while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
+ free == NULL)) {
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ if (pmap != pc->pc_pmap) {
+ if (pmap != NULL) {
+ pmap_invalidate_all(pmap);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ }
+ pmap = pc->pc_pmap;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap)
PMAP_LOCK(pmap);
- else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
+ else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
+ pmap = NULL;
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
continue;
- pmap->pm_stats.resident_count--;
- pde = pmap_pde(pmap, va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
- " a 4mpage in page %p's pv list", m));
- pte = pmap_pte_quick(pmap, va);
- tpte = pte_load_clear(pte);
- KASSERT((tpte & PG_W) == 0,
- ("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
- if (tpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- free = NULL;
- pmap_unuse_pt(pmap, va, &free);
- pmap_invalidate_page(pmap, va);
- pmap_free_zero_pages(free);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- free_pv_entry(pmap, pv);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
+ }
}
- if (TAILQ_EMPTY(&m->md.pv_list) &&
- TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
+
+ /*
+ * Destroy every non-wired, 4 KB page mapping in the chunk.
+ */
+ freed = 0;
+ for (field = 0; field < _NPCM; field++) {
+ for (inuse = ~pc->pc_map[field] & pc_freemask[field];
+ inuse != 0; inuse &= ~(1UL << bit)) {
+ bit = bsfl(inuse);
+ pv = &pc->pc_pventry[field * 32 + bit];
+ va = pv->pv_va;
+ pde = pmap_pde(pmap, va);
+ if ((*pde & PG_PS) != 0)
+ continue;
+ pte = pmap_pte_quick(pmap, va);
+ if ((*pte & PG_W) != 0)
+ continue;
+ tpte = pte_load_clear(pte);
+ if ((tpte & PG_G) != 0)
+ pmap_invalidate_page(pmap, va);
+ m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
+ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+ vm_page_dirty(m);
+ if ((tpte & PG_A) != 0)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ (m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list)) {
+ vm_page_aflag_clear(m,
+ PGA_WRITEABLE);
+ }
+ }
+ pc->pc_map[field] |= 1UL << bit;
+ pmap_unuse_pt(pmap, va, &free);
+ freed++;
+ }
+ }
+ if (freed == 0) {
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+ continue;
+ }
+ /* Every freed mapping is for a 4 KB page. */
+ pmap->pm_stats.resident_count -= freed;
+ PV_STAT(pv_entry_frees += freed);
+ PV_STAT(pv_entry_spare += freed);
+ pv_entry_count -= freed;
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ for (field = 0; field < _NPCM; field++)
+ if (pc->pc_map[field] != pc_freemask[field]) {
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
+ pc_list);
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+
+ /*
+ * One freed pv entry in locked_pmap is
+ * sufficient.
+ */
+ if (pmap == locked_pmap)
+ goto out;
+ break;
+ }
+ if (field == _NPCM) {
+ PV_STAT(pv_entry_spare -= _NPCPV);
+ PV_STAT(pc_chunk_count--);
+ PV_STAT(pc_chunk_frees++);
+ /* Entire chunk is free; return it. */
+ m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
+ pmap_qremove((vm_offset_t)pc, 1);
+ pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
+ break;
+ }
}
+out:
sched_unpin();
+ TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
+ if (pmap != NULL) {
+ pmap_invalidate_all(pmap);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ }
+ if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
+ m_pc = free;
+ free = m_pc->right;
+ /* Recycle a freed page table page. */
+ m_pc->wire_count = 1;
+ atomic_add_int(&cnt.v_wire_count, 1);
+ }
+ pmap_free_zero_pages(free);
+ return (m_pc);
}
-
/*
* free the pv_entry back to the free list
*/
static void
free_pv_entry(pmap_t pmap, pv_entry_t pv)
{
- vm_page_t m;
struct pv_chunk *pc;
int idx, field, bit;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(pv_entry_frees++);
PV_STAT(pv_entry_spare++);
@@ -2273,13 +2366,30 @@
field = idx / 32;
bit = idx % 32;
pc->pc_map[field] |= 1ul << bit;
- /* move to head of list */
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
for (idx = 0; idx < _NPCM; idx++)
if (pc->pc_map[idx] != pc_freemask[idx]) {
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ /*
+ * 98% of the time, pc is already at the head of the
+ * list. If it isn't already, move it to the head.
+ */
+ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
+ pc)) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
+ pc_list);
+ }
return;
}
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ free_pv_chunk(pc);
+}
+
+static void
+free_pv_chunk(struct pv_chunk *pc)
+{
+ vm_page_t m;
+
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
PV_STAT(pv_entry_spare -= _NPCPV);
PV_STAT(pc_chunk_count--);
PV_STAT(pc_chunk_frees++);
@@ -2296,18 +2406,17 @@
* when needed.
*/
static pv_entry_t
-get_pv_entry(pmap_t pmap, int try)
+get_pv_entry(pmap_t pmap, boolean_t try)
{
static const struct timeval printinterval = { 60, 0 };
static struct timeval lastprint;
- struct vpgqueues *pq;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
vm_page_t m;
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
PV_STAT(pv_entry_allocs++);
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
@@ -2315,7 +2424,6 @@
printf("Approaching the limit on PV entries, consider "
"increasing either the vm.pmap.shpgperproc or the "
"vm.pmap.pv_entry_max tunable.\n");
- pq = NULL;
retry:
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
if (pc != NULL) {
@@ -2341,33 +2449,20 @@
}
}
/*
- * Access to the ptelist "pv_vafree" is synchronized by the page
- * queues lock. If "pv_vafree" is currently non-empty, it will
+ * Access to the ptelist "pv_vafree" is synchronized by the pvh
+ * global lock. If "pv_vafree" is currently non-empty, it will
* remain non-empty until pmap_ptelist_alloc() completes.
*/
- if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, (pq ==
- &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
+ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
if (try) {
pv_entry_count--;
PV_STAT(pc_chunk_tryfail++);
return (NULL);
}
- /*
- * Reclaim pv entries: At first, destroy mappings to
- * inactive pages. After that, if a pv chunk entry
- * is still needed, destroy mappings to active pages.
- */
- if (pq == NULL) {
- PV_STAT(pmap_collect_inactive++);
- pq = &vm_page_queues[PQ_INACTIVE];
- } else if (pq == &vm_page_queues[PQ_INACTIVE]) {
- PV_STAT(pmap_collect_active++);
- pq = &vm_page_queues[PQ_ACTIVE];
- } else
- panic("get_pv_entry: increase vm.pmap.shpgperproc");
- pmap_collect(pmap, pq);
- goto retry;
+ m = pmap_pv_reclaim(pmap);
+ if (m == NULL)
+ goto retry;
}
PV_STAT(pc_chunk_count++);
PV_STAT(pc_chunk_allocs++);
@@ -2377,6 +2472,7 @@
pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
for (field = 1; field < _NPCM; field++)
pc->pc_map[field] = pc_freemask[field];
+ TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(pv_entry_spare += _NPCPV - 1);
@@ -2388,7 +2484,7 @@
{
pv_entry_t pv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
@@ -2406,7 +2502,7 @@
vm_offset_t va_last;
vm_page_t m;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 4mpage aligned"));
@@ -2439,7 +2535,7 @@
vm_offset_t va_last;
vm_page_t m;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 4mpage aligned"));
@@ -2480,7 +2576,7 @@
{
struct md_page *pvh;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
pmap_pvh_free(&m->md, pmap, va);
if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) {
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -2498,8 +2594,8 @@
{
pv_entry_t pv;
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
pv = get_pv_entry(pmap, FALSE);
pv->pv_va = va;
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
@@ -2513,8 +2609,8 @@
{
pv_entry_t pv;
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
if (pv_entry_count < pv_entry_high_water &&
(pv = get_pv_entry(pmap, TRUE)) != NULL) {
pv->pv_va = va;
@@ -2533,7 +2629,7 @@
struct md_page *pvh;
pv_entry_t pv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
if (pv_entry_count < pv_entry_high_water &&
(pv = get_pv_entry(pmap, TRUE)) != NULL) {
pv->pv_va = va;
@@ -2611,7 +2707,7 @@
*/
if (va >= KERNBASE)
firstpte = &KPTmap[i386_btop(trunc_4mpage(va))];
- else if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) {
+ else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) {
if ((*PMAP1 & PG_FRAME) != mptepa) {
*PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M;
#ifdef SMP
@@ -2770,7 +2866,7 @@
pt_entry_t oldpte;
vm_page_t m;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
oldpte = pte_load_clear(ptq);
if (oldpte & PG_W)
@@ -2801,7 +2897,7 @@
{
pt_entry_t *pte;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
@@ -2833,7 +2929,7 @@
anyvalid = 0;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
sched_pin();
PMAP_LOCK(pmap);
@@ -2922,7 +3018,7 @@
sched_unpin();
if (anyvalid)
pmap_invalidate_all(pmap);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(free);
}
@@ -2954,7 +3050,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
free = NULL;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
sched_pin();
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
@@ -2995,7 +3091,7 @@
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
sched_unpin();
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
pmap_free_zero_pages(free);
}
@@ -3050,7 +3146,7 @@
vm_offset_t pdnxt;
pd_entry_t ptpaddr;
pt_entry_t *pte;
- int anychanged;
+ boolean_t anychanged, pv_lists_locked;
if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
@@ -3066,10 +3162,16 @@
return;
#endif
- anychanged = 0;
-
- vm_page_lock_queues();
- sched_pin();
+ if (pmap_is_current(pmap))
+ pv_lists_locked = FALSE;
+ else {
+ pv_lists_locked = TRUE;
+resume:
+ rw_wlock(&pvh_global_lock);
+ sched_pin();
+ }
+ anychanged = FALSE;
+
PMAP_LOCK(pmap);
for (; sva < eva; sva = pdnxt) {
pt_entry_t obits, pbits;
@@ -3104,12 +3206,27 @@
*/
if (pmap_protect_pde(pmap,
&pmap->pm_pdir[pdirindex], sva, prot))
- anychanged = 1;
+ anychanged = TRUE;
continue;
- } else if (!pmap_demote_pde(pmap,
- &pmap->pm_pdir[pdirindex], sva)) {
- /* The large page mapping was destroyed. */
- continue;
+ } else {
+ if (!pv_lists_locked) {
+ pv_lists_locked = TRUE;
+ if (!rw_try_wlock(&pvh_global_lock)) {
+ if (anychanged)
+ pmap_invalidate_all(
+ pmap);
+ PMAP_UNLOCK(pmap);
+ goto resume;
+ }
+ }
+ if (!pmap_demote_pde(pmap,
+ &pmap->pm_pdir[pdirindex], sva)) {
+ /*
+ * The large page mapping was
+ * destroyed.
+ */
+ continue;
+ }
}
}
@@ -3155,14 +3272,16 @@
if (obits & PG_G)
pmap_invalidate_page(pmap, sva);
else
- anychanged = 1;
+ anychanged = TRUE;
}
}
}
- sched_unpin();
if (anychanged)
pmap_invalidate_all(pmap);
- vm_page_unlock_queues();
+ if (pv_lists_locked) {
+ sched_unpin();
+ rw_wunlock(&pvh_global_lock);
+ }
PMAP_UNLOCK(pmap);
}
@@ -3332,7 +3451,7 @@
mpte = NULL;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
sched_pin();
@@ -3502,7 +3621,7 @@
pmap_promote_pde(pmap, pde, va);
sched_unpin();
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -3517,7 +3636,7 @@
{
pd_entry_t *pde, newpde;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
pde = pmap_pde(pmap, va);
if (*pde != 0) {
@@ -3586,7 +3705,7 @@
psize = atop(end - start);
mpte = NULL;
m = m_start;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
@@ -3600,7 +3719,7 @@
mpte);
m = TAILQ_NEXT(m, listq);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -3617,10 +3736,10 @@
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -3635,7 +3754,7 @@
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
@@ -3841,9 +3960,9 @@
if (!wired != ((*pde & PG_W) == 0)) {
if (!are_queues_locked) {
are_queues_locked = TRUE;
- if (!mtx_trylock(&vm_page_queue_mtx)) {
+ if (!rw_try_wlock(&pvh_global_lock)) {
PMAP_UNLOCK(pmap);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
goto retry;
}
}
@@ -3867,7 +3986,7 @@
pmap_pte_release(pte);
out:
if (are_queues_locked)
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -3896,7 +4015,7 @@
if (!pmap_is_current(src_pmap))
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
@@ -3986,7 +4105,7 @@
}
out:
sched_unpin();
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -4128,7 +4247,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
if (PV_PMAP(pv) == pmap) {
rv = TRUE;
@@ -4150,7 +4269,7 @@
break;
}
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -4168,13 +4287,13 @@
count = 0;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (count);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
count = pmap_pvh_wired_mappings(&m->md, count);
if ((m->flags & PG_FICTITIOUS) == 0) {
count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)),
count);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (count);
}
@@ -4190,7 +4309,7 @@
pt_entry_t *pte;
pv_entry_t pv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
sched_pin();
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
pmap = PV_PMAP(pv);
@@ -4215,11 +4334,11 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -4249,13 +4368,13 @@
printf("warning: pmap_remove_pages called with non-current pmap\n");
return;
}
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
sched_pin();
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
for (field = 0; field < _NPCM; field++) {
- inuse = (~(pc->pc_map[field])) & pc_freemask[field];
+ inuse = ~pc->pc_map[field] & pc_freemask[field];
while (inuse != 0) {
bit = bsfl(inuse);
bitmask = 1UL << bit;
@@ -4347,20 +4466,13 @@
}
}
if (allfree) {
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
- pmap_qremove((vm_offset_t)pc, 1);
- vm_page_unwire(m, 0);
- vm_page_free(m);
- pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
+ free_pv_chunk(pc);
}
}
sched_unpin();
pmap_invalidate_all(pmap);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(free);
}
@@ -4388,11 +4500,11 @@
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return (FALSE);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
rv = pmap_is_modified_pvh(&m->md) ||
((m->flags & PG_FICTITIOUS) == 0 &&
pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -4409,7 +4521,7 @@
pmap_t pmap;
boolean_t rv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
rv = FALSE;
sched_pin();
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
@@ -4462,11 +4574,11 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_referenced: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
rv = pmap_is_referenced_pvh(&m->md) ||
((m->flags & PG_FICTITIOUS) == 0 &&
pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -4482,7 +4594,7 @@
pmap_t pmap;
boolean_t rv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
rv = FALSE;
sched_pin();
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
@@ -4523,7 +4635,7 @@
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
sched_pin();
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
@@ -4564,7 +4676,7 @@
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
sched_unpin();
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -4593,7 +4705,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
sched_pin();
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
@@ -4652,7 +4764,7 @@
}
out:
sched_unpin();
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rtval);
}
@@ -4682,7 +4794,7 @@
*/
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
sched_pin();
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
@@ -4743,7 +4855,7 @@
PMAP_UNLOCK(pmap);
}
sched_unpin();
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -4763,7 +4875,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_reference: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
sched_pin();
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
@@ -4810,7 +4922,7 @@
PMAP_UNLOCK(pmap);
}
sched_unpin();
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/ptrace_machdep.c
--- a/head/sys/i386/i386/ptrace_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/ptrace_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/i386/i386/ptrace_machdep.c 238675 2012-07-21 21:39:02Z kib $");
#include "opt_cpu.h"
@@ -54,10 +54,12 @@
fpstate = &td->td_pcb->pcb_user_save.sv_xmm;
switch (req) {
case PT_GETXMMREGS:
+ npxgetregs(td);
error = copyout(fpstate, addr, sizeof(*fpstate));
break;
case PT_SETXMMREGS:
+ npxgetregs(td);
error = copyin(addr, fpstate, sizeof(*fpstate));
fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
break;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/swtch.s
--- a/head/sys/i386/i386/swtch.s Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/swtch.s Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/i386/i386/swtch.s 237027 2012-06-13 21:03:01Z jkim $
*/
#include "opt_npx.h"
@@ -386,6 +386,36 @@
pushfl
popl PCB_PSL(%ecx)
+ movl %cr0,%eax
+ movl %eax,PCB_CR0(%ecx)
+ movl %cr2,%eax
+ movl %eax,PCB_CR2(%ecx)
+ movl %cr4,%eax
+ movl %eax,PCB_CR4(%ecx)
+
+ movl %dr0,%eax
+ movl %eax,PCB_DR0(%ecx)
+ movl %dr1,%eax
+ movl %eax,PCB_DR1(%ecx)
+ movl %dr2,%eax
+ movl %eax,PCB_DR2(%ecx)
+ movl %dr3,%eax
+ movl %eax,PCB_DR3(%ecx)
+ movl %dr6,%eax
+ movl %eax,PCB_DR6(%ecx)
+ movl %dr7,%eax
+ movl %eax,PCB_DR7(%ecx)
+
+ mov %ds,PCB_DS(%ecx)
+ mov %es,PCB_ES(%ecx)
+ mov %fs,PCB_FS(%ecx)
+ mov %ss,PCB_SS(%ecx)
+
+ sgdt PCB_GDT(%ecx)
+ sidt PCB_IDT(%ecx)
+ sldt PCB_LDT(%ecx)
+ str PCB_TR(%ecx)
+
#ifdef DEV_NPX
/*
* If fpcurthread == NULL, then the npx h/w state is irrelevant and the
@@ -425,5 +455,84 @@
popfl
#endif /* DEV_NPX */
+ movl $1,%eax
ret
END(savectx)
+
+/*
+ * resumectx(pcb) __fastcall
+ * Resuming processor state from pcb.
+ */
+ENTRY(resumectx)
+ /* Restore GDT. */
+ lgdt PCB_GDT(%ecx)
+
+ /* Restore segment registers */
+ movzwl PCB_DS(%ecx),%eax
+ mov %ax,%ds
+ movzwl PCB_ES(%ecx),%eax
+ mov %ax,%es
+ movzwl PCB_FS(%ecx),%eax
+ mov %ax,%fs
+ movzwl PCB_GS(%ecx),%eax
+ movw %ax,%gs
+ movzwl PCB_SS(%ecx),%eax
+ mov %ax,%ss
+
+ /* Restore CR2, CR4, CR3 and CR0 */
+ movl PCB_CR2(%ecx),%eax
+ movl %eax,%cr2
+ movl PCB_CR4(%ecx),%eax
+ movl %eax,%cr4
+ movl PCB_CR3(%ecx),%eax
+ movl %eax,%cr3
+ movl PCB_CR0(%ecx),%eax
+ movl %eax,%cr0
+ jmp 1f
+1:
+
+ /* Restore descriptor tables */
+ lidt PCB_IDT(%ecx)
+ lldt PCB_LDT(%ecx)
+
+#define SDT_SYS386TSS 9
+#define SDT_SYS386BSY 11
+ /* Clear "task busy" bit and reload TR */
+ movl PCPU(TSS_GDT),%eax
+ andb $(~SDT_SYS386BSY | SDT_SYS386TSS),5(%eax)
+ movzwl PCB_TR(%ecx),%eax
+ ltr %ax
+#undef SDT_SYS386TSS
+#undef SDT_SYS386BSY
+
+ /* Restore debug registers */
+ movl PCB_DR0(%ecx),%eax
+ movl %eax,%dr0
+ movl PCB_DR1(%ecx),%eax
+ movl %eax,%dr1
+ movl PCB_DR2(%ecx),%eax
+ movl %eax,%dr2
+ movl PCB_DR3(%ecx),%eax
+ movl %eax,%dr3
+ movl PCB_DR6(%ecx),%eax
+ movl %eax,%dr6
+ movl PCB_DR7(%ecx),%eax
+ movl %eax,%dr7
+
+#ifdef DEV_NPX
+ /* XXX FIX ME */
+#endif
+
+ /* Restore other registers */
+ movl PCB_EDI(%ecx),%edi
+ movl PCB_ESI(%ecx),%esi
+ movl PCB_EBP(%ecx),%ebp
+ movl PCB_ESP(%ecx),%esp
+ movl PCB_EBX(%ecx),%ebx
+
+ /* reload code selector by turning return into intersegmental return */
+ pushl PCB_EIP(%ecx)
+ movl $KCSEL,4(%esp)
+ xorl %eax,%eax
+ lret
+END(resumectx)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/trap.c
--- a/head/sys/i386/i386/trap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/trap.c Wed Jul 25 16:40:53 2012 +0300
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/trap.c 233781 2012-04-02 15:07:22Z jhb $");
+__FBSDID("$FreeBSD: head/sys/i386/i386/trap.c 238678 2012-07-21 21:52:48Z kib $");
/*
* 386 Trap and System call handling
@@ -369,7 +369,7 @@
case T_ARITHTRAP: /* arithmetic trap */
#ifdef DEV_NPX
- ucode = npxtrap();
+ ucode = npxtrap_x87();
if (ucode == -1)
goto userout;
#else
@@ -532,7 +532,13 @@
break;
case T_XMMFLT: /* SIMD floating-point exception */
- ucode = 0; /* XXX */
+#if defined(DEV_NPX) && !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
+ ucode = npxtrap_sse();
+ if (ucode == -1)
+ goto userout;
+#else
+ ucode = 0;
+#endif
i = SIGFPE;
break;
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/i386/vm86.c
--- a/head/sys/i386/i386/vm86.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/i386/vm86.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/vm86.c 234350 2012-04-16 19:31:44Z jkim $");
+__FBSDID("$FreeBSD: head/sys/i386/i386/vm86.c 237924 2012-07-01 12:59:00Z brueffer $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -650,7 +650,6 @@
return (1);
}
return (0);
- panic("vm86_getptr: address not found");
}
int
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/apicvar.h
--- a/head/sys/i386/include/apicvar.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/apicvar.h Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/i386/include/apicvar.h 232230 2012-02-27 17:30:21Z jhb $
+ * $FreeBSD: head/sys/i386/include/apicvar.h 235622 2012-05-18 18:55:58Z iwasaki $
*/
#ifndef _MACHINE_APICVAR_H_
@@ -126,7 +126,8 @@
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
-#define IPI_STOP_HARD (APIC_IPI_INTS + 8) /* Stop CPU with a NMI. */
+#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */
+#define IPI_STOP_HARD (APIC_IPI_INTS + 9) /* Stop CPU with a NMI. */
/*
* The spurious interrupt can share the priority class with the IPIs since
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/atomic.h
--- a/head/sys/i386/include/atomic.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/atomic.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/i386/include/atomic.h 220404 2011-04-06 23:59:59Z jkim $
+ * $FreeBSD: head/sys/i386/include/atomic.h 236456 2012-06-02 18:10:16Z kib $
*/
#ifndef _MACHINE_ATOMIC_H_
#define _MACHINE_ATOMIC_H_
@@ -32,9 +32,9 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
-#define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory")
-#define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory")
-#define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory")
+#define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
+#define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
+#define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
/*
* Various simple operations on memory, each of which is atomic in the
@@ -79,8 +79,9 @@
int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
-#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \
-u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \
+#define ATOMIC_LOAD(TYPE, LOP) \
+u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
+#define ATOMIC_STORE(TYPE) \
void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
#else /* !KLD_MODULE && __GNUCLIKE_ASM */
@@ -280,16 +281,29 @@
return (v);
}
+/*
+ * We assume that a = b will do atomic loads and stores. Due to the
+ * IA32 memory model, a simple store guarantees release semantics.
+ *
+ * However, loads may pass stores, so for atomic_load_acq we have to
+ * ensure a Store/Load barrier to do the load in SMP kernels. We use
+ * "lock cmpxchg" as recommended by the AMD Software Optimization
+ * Guide, and not mfence. For UP kernels, however, the cache of the
+ * single processor is always consistent, so we only need to take care
+ * of the compiler.
+ */
+#define ATOMIC_STORE(TYPE) \
+static __inline void \
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{ \
+ __asm __volatile("" : : : "memory"); \
+ *p = v; \
+} \
+struct __hack
+
#if defined(_KERNEL) && !defined(SMP)
-/*
- * We assume that a = b will do atomic loads and stores. However, on a
- * PentiumPro or higher, reads may pass writes, so for that case we have
- * to use a serializing instruction (i.e. with LOCK) to do the load in
- * SMP kernels. For UP kernels, however, the cache of the single processor
- * is always consistent, so we only need to take care of compiler.
- */
-#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \
+#define ATOMIC_LOAD(TYPE, LOP) \
static __inline u_##TYPE \
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
{ \
@@ -299,18 +313,11 @@
__asm __volatile("" : : : "memory"); \
return (tmp); \
} \
- \
-static __inline void \
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{ \
- __asm __volatile("" : : : "memory"); \
- *p = v; \
-} \
struct __hack
#else /* !(_KERNEL && !SMP) */
-#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \
+#define ATOMIC_LOAD(TYPE, LOP) \
static __inline u_##TYPE \
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
{ \
@@ -324,19 +331,6 @@
\
return (res); \
} \
- \
-/* \
- * The XCHG instruction asserts LOCK automagically. \
- */ \
-static __inline void \
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{ \
- __asm __volatile(SOP \
- : "=m" (*p), /* 0 */ \
- "+r" (v) /* 1 */ \
- : "m" (*p) /* 2 */ \
- : "memory"); \
-} \
struct __hack
#endif /* _KERNEL && !SMP */
@@ -363,13 +357,19 @@
ATOMIC_ASM(add, long, "addl %1,%0", "ir", v);
ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v);
-ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0");
-ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
-ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0");
-ATOMIC_STORE_LOAD(long, "cmpxchgl %0,%1", "xchgl %1,%0");
+ATOMIC_LOAD(char, "cmpxchgb %b0,%1");
+ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
+ATOMIC_LOAD(int, "cmpxchgl %0,%1");
+ATOMIC_LOAD(long, "cmpxchgl %0,%1");
+
+ATOMIC_STORE(char);
+ATOMIC_STORE(short);
+ATOMIC_STORE(int);
+ATOMIC_STORE(long);
#undef ATOMIC_ASM
-#undef ATOMIC_STORE_LOAD
+#undef ATOMIC_LOAD
+#undef ATOMIC_STORE
#ifndef WANT_FUNCTIONS
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/bootinfo.h
--- a/head/sys/i386/include/bootinfo.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/bootinfo.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/i386/include/bootinfo.h 235391 2012-05-13 09:25:39Z avg $
*/
#ifndef _MACHINE_BOOTINFO_H_
@@ -65,13 +65,13 @@
u_int32_t bi_kernend; /* end of kernel space */
u_int32_t bi_envp; /* environment */
u_int32_t bi_modulep; /* preloaded modules */
+ uint32_t bi_memdesc_version; /* EFI memory desc version */
+ uint64_t bi_memdesc_size; /* sizeof EFI memory desc */
+ uint64_t bi_memmap; /* pa of EFI memory map */
+ uint64_t bi_memmap_size; /* size of EFI memory map */
uint64_t bi_hcdp; /* DIG64 HCDP table */
uint64_t bi_fpswa; /* FPSWA interface */
uint64_t bi_systab; /* pa of EFI system table */
- uint64_t bi_memmap; /* pa of EFI memory map */
- uint64_t bi_memmap_size; /* size of EFI memory map */
- uint64_t bi_memdesc_size; /* sizeof EFI memory desc */
- uint32_t bi_memdesc_version; /* EFI memory desc version */
};
#ifdef _KERNEL
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/cpufunc.h
--- a/head/sys/i386/include/cpufunc.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/cpufunc.h Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/i386/include/cpufunc.h 223796 2011-07-05 18:42:10Z jkim $
+ * $FreeBSD: head/sys/i386/include/cpufunc.h 238311 2012-07-09 20:55:39Z jhb $
*/
/*
@@ -97,6 +97,13 @@
}
static __inline void
+clts(void)
+{
+
+ __asm __volatile("clts");
+}
+
+static __inline void
disable_intr(void)
{
#ifdef XEN
@@ -688,6 +695,9 @@
int breakpoint(void);
u_int bsfl(u_int mask);
u_int bsrl(u_int mask);
+void clflush(u_long addr);
+void clts(void);
+void cpuid_count(u_int ax, u_int cx, u_int *p);
void disable_intr(void);
void do_cpuid(u_int ax, u_int *p);
void enable_intr(void);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/elf.h
--- a/head/sys/i386/include/elf.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/elf.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/i386/include/elf.h 237430 2012-06-22 06:38:31Z kib $
*/
#ifndef _MACHINE_ELF_H_
@@ -96,6 +96,7 @@
#define AT_NCPUS 19 /* Number of CPUs. */
#define AT_PAGESIZES 20 /* Pagesizes. */
#define AT_PAGESIZESLEN 21 /* Number of pagesizes. */
+#define AT_TIMEKEEP 22 /* Pointer to timehands. */
#define AT_STACKPROT 23 /* Initial stack protection. */
#define AT_COUNT 24 /* Count of defined aux entry types. */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/in_cksum.h
--- a/head/sys/i386/include/in_cksum.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/in_cksum.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* from tahoe: in_cksum.c 1.2 86/01/05
* from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
* from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp
- * $FreeBSD$
+ * $FreeBSD: head/sys/i386/include/in_cksum.h 235941 2012-05-24 22:00:48Z bz $
*/
#ifndef _MACHINE_IN_CKSUM_H_
@@ -54,6 +54,7 @@
* therefore always exactly five 32-bit words.
*/
#if defined(__GNUCLIKE_ASM) && !defined(__INTEL_COMPILER)
+#if defined(IPVERSION) && (IPVERSION == 4)
static __inline u_int
in_cksum_hdr(const struct ip *ip)
{
@@ -88,6 +89,7 @@
__tmpsum = (int)ntohs(ip->ip_sum) + 256;
ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16));
}
+#endif
static __inline u_short
in_addword(u_short sum, u_short b)
@@ -121,6 +123,7 @@
}
#else
+#if defined(IPVERSION) && (IPVERSION == 4)
#define in_cksum_update(ip) \
do { \
int __tmpsum; \
@@ -129,10 +132,13 @@
} while(0)
#endif
+#endif
#ifdef _KERNEL
#if !defined(__GNUCLIKE_ASM) || defined(__INTEL_COMPILER)
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/intr_machdep.h
--- a/head/sys/i386/include/intr_machdep.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/intr_machdep.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/i386/include/intr_machdep.h 234207 2012-04-13 07:15:40Z avg $
+ * $FreeBSD: head/sys/i386/include/intr_machdep.h 234989 2012-05-03 21:44:01Z attilio $
*/
#ifndef __MACHINE_INTR_MACHDEP_H__
@@ -131,9 +131,7 @@
enum intr_trigger elcr_read_trigger(u_int irq);
void elcr_resume(void);
void elcr_write_trigger(u_int irq, enum intr_trigger trigger);
-#ifdef SMP
void intr_add_cpu(u_int cpu);
-#endif
int intr_add_handler(const char *name, int vector, driver_filter_t filter,
driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
#ifdef SMP
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/md_var.h
--- a/head/sys/i386/include/md_var.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/md_var.h Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/i386/include/md_var.h 235622 2012-05-18 18:55:58Z iwasaki $
*/
#ifndef _MACHINE_MD_VAR_H_
@@ -91,6 +91,7 @@
void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
void dump_add_page(vm_paddr_t);
void dump_drop_page(vm_paddr_t);
+void initializecpu(void);
void enable_sse(void);
void fillw(int /*u_short*/ pat, void *base, size_t cnt);
void i686_pagezero(void *addr);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/npx.h
--- a/head/sys/i386/include/npx.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/npx.h Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
* SUCH DAMAGE.
*
* from: @(#)npx.h 5.3 (Berkeley) 1/18/91
- * $FreeBSD: head/sys/i386/include/npx.h 233044 2012-03-16 20:24:30Z tijl $
+ * $FreeBSD: head/sys/i386/include/npx.h 238678 2012-07-21 21:52:48Z kib $
*/
/*
@@ -55,7 +55,8 @@
void npxinit(void);
void npxsave(union savefpu *addr);
void npxsetregs(struct thread *td, union savefpu *addr);
-int npxtrap(void);
+int npxtrap_x87(void);
+int npxtrap_sse(void);
void npxuserinited(struct thread *);
struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags);
void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/pcb.h
--- a/head/sys/i386/include/pcb.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/pcb.h Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
* SUCH DAMAGE.
*
* from: @(#)pcb.h 5.10 (Berkeley) 5/12/91
- * $FreeBSD$
+ * $FreeBSD: head/sys/i386/include/pcb.h 237027 2012-06-13 21:03:01Z jkim $
*/
#ifndef _I386_PCB_H_
@@ -45,7 +45,10 @@
#include <machine/npx.h>
struct pcb {
+ int pcb_cr0;
+ int pcb_cr2;
int pcb_cr3;
+ int pcb_cr4;
int pcb_edi;
int pcb_esi;
int pcb_ebp;
@@ -71,20 +74,30 @@
#define PCB_KERNNPX 0x40 /* kernel uses npx */
caddr_t pcb_onfault; /* copyin/out fault recovery */
+ int pcb_ds;
+ int pcb_es;
+ int pcb_fs;
int pcb_gs;
+ int pcb_ss;
struct segment_descriptor pcb_fsd;
struct segment_descriptor pcb_gsd;
struct pcb_ext *pcb_ext; /* optional pcb extension */
int pcb_psl; /* process status long */
u_long pcb_vm86[2]; /* vm86bios scratch space */
union savefpu *pcb_save;
+
+ struct region_descriptor pcb_gdt;
+ struct region_descriptor pcb_idt;
+ uint16_t pcb_ldt;
+ uint16_t pcb_tr;
};
#ifdef _KERNEL
struct trapframe;
void makectx(struct trapframe *, struct pcb *);
-void savectx(struct pcb *);
+int savectx(struct pcb *) __returns_twice;
+void resumectx(struct pcb *) __fastcall;
#endif
#endif /* _I386_PCB_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/pmap.h
--- a/head/sys/i386/include/pmap.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/pmap.h Wed Jul 25 16:40:53 2012 +0300
@@ -38,7 +38,7 @@
*
* from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
* from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
- * $FreeBSD: head/sys/i386/include/pmap.h 222813 2011-06-07 08:46:13Z attilio $
+ * $FreeBSD: head/sys/i386/include/pmap.h 237168 2012-06-16 18:56:19Z alc $
*/
#ifndef _MACHINE_PMAP_H_
@@ -481,7 +481,7 @@
pmap_t pc_pmap;
TAILQ_ENTRY(pv_chunk) pc_list;
uint32_t pc_map[_NPCM]; /* bitmap; 1 = free */
- uint32_t pc_spare[2];
+ TAILQ_ENTRY(pv_chunk) pc_lru;
struct pv_entry pc_pventry[_NPCPV];
};
@@ -498,6 +498,7 @@
extern vm_offset_t virtual_end;
#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode)
+#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/smp.h
--- a/head/sys/i386/include/smp.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/smp.h Wed Jul 25 16:40:53 2012 +0300
@@ -6,7 +6,7 @@
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
* ----------------------------------------------------------------------------
*
- * $FreeBSD: head/sys/i386/include/smp.h 222853 2011-06-08 08:12:15Z avg $
+ * $FreeBSD: head/sys/i386/include/smp.h 236938 2012-06-12 00:14:54Z iwasaki $
*
*/
@@ -53,13 +53,18 @@
IDTVEC(invlcache), /* Write back and invalidate cache */
IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */
IDTVEC(cpustop), /* CPU stops & waits to be restarted */
+ IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
IDTVEC(rendezvous), /* handle CPU rendezvous */
IDTVEC(lazypmap); /* handle lazy pmap release */
/* functions in mp_machdep.c */
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
+#ifndef XEN
+void cpususpend_handler(void);
+#endif
void init_secondary(void);
+void ipi_startup(int apic_id, int vector);
void ipi_all_but_self(u_int ipi);
#ifndef XEN
void ipi_bitmap_handler(struct trapframe frame);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/vdso.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/i386/include/vdso.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: head/sys/i386/include/vdso.h 237433 2012-06-22 07:06:40Z kib $ */
+
+#include <x86/vdso.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/include/vmparam.h
--- a/head/sys/i386/include/vmparam.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/include/vmparam.h Wed Jul 25 16:40:53 2012 +0300
@@ -32,7 +32,7 @@
* SUCH DAMAGE.
*
* from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91
- * $FreeBSD: head/sys/i386/include/vmparam.h 228398 2011-12-10 18:42:00Z alc $
+ * $FreeBSD: head/sys/i386/include/vmparam.h 237435 2012-06-22 07:16:29Z kib $
*/
@@ -165,7 +165,8 @@
#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0)
-#define USRSTACK VM_MAXUSER_ADDRESS
+#define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE)
+#define USRSTACK SHAREDPAGE
#define VM_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define VM_MIN_ADDRESS ((vm_offset_t)0)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/isa/npx.c
--- a/head/sys/i386/isa/npx.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/isa/npx.c Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/isa/npx.c 230426 2012-01-21 17:45:27Z kib $");
+__FBSDID("$FreeBSD: head/sys/i386/isa/npx.c 238678 2012-07-21 21:52:48Z kib $");
#include "opt_cpu.h"
#include "opt_isa.h"
@@ -99,15 +99,7 @@
#ifdef CPU_ENABLE_SSE
#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
-#endif
-#ifdef XEN
-#define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
-#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
-#else
-#define start_emulating() __asm __volatile( \
- "smsw %%ax; orb %0,%%al; lmsw %%ax" \
- : : "n" (CR0_TS) : "ax")
-#define stop_emulating() __asm __volatile("clts")
+#define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr)))
#endif
#else /* !(__GNUCLIKE_ASM && !lint) */
@@ -122,12 +114,19 @@
#ifdef CPU_ENABLE_SSE
void fxsave(caddr_t addr);
void fxrstor(caddr_t addr);
+void stmxcsr(u_int csr);
#endif
-void start_emulating(void);
-void stop_emulating(void);
#endif /* __GNUCLIKE_ASM && !lint */
+#ifdef XEN
+#define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
+#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
+#else
+#define start_emulating() load_cr0(rcr0() | CR0_TS)
+#define stop_emulating() clts()
+#endif
+
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(thread) \
(cpu_fxsr ? \
@@ -584,29 +583,30 @@
};
/*
- * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
+ * Read the FP status and control words, then generate si_code value
+ * for SIGFPE. The error code chosen will be one of the
+ * FPE_... macros. It will be sent as the second argument to old
+ * BSD-style signal handlers and as "siginfo_t->si_code" (second
+ * argument) to SA_SIGINFO signal handlers.
*
- * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now
- * depend on longjmp() restoring a usable state. Restoring the state
- * or examining it might fail if we didn't clear exceptions.
+ * Some time ago, we cleared the x87 exceptions with FNCLEX there.
+ * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
+ * usermode code which understands the FPU hardware enough to enable
+ * the exceptions, can also handle clearing the exception state in the
+ * handler. The only consequence of not clearing the exception is the
+ * rethrow of the SIGFPE on return from the signal handler and
+ * reexecution of the corresponding instruction.
*
- * The error code chosen will be one of the FPE_... macros. It will be
- * sent as the second argument to old BSD-style signal handlers and as
- * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers.
- *
- * XXX the FP state is not preserved across signal handlers. So signal
- * handlers cannot afford to do FP unless they preserve the state or
- * longjmp() out. Both preserving the state and longjmp()ing may be
- * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable
- * solution for signals other than SIGFPE.
+ * For XMM traps, the exceptions were never cleared.
*/
int
-npxtrap()
+npxtrap_x87(void)
{
u_short control, status;
if (!hw_float) {
- printf("npxtrap: fpcurthread = %p, curthread = %p, hw_float = %d\n",
+ printf(
+ "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
PCPU_GET(fpcurthread), curthread, hw_float);
panic("npxtrap from nowhere");
}
@@ -624,13 +624,32 @@
fnstcw(&control);
fnstsw(&status);
}
-
- if (PCPU_GET(fpcurthread) == curthread)
- fnclex();
critical_exit();
return (fpetable[status & ((~control & 0x3f) | 0x40)]);
}
+#ifdef CPU_ENABLE_SSE
+int
+npxtrap_sse(void)
+{
+ u_int mxcsr;
+
+ if (!hw_float) {
+ printf(
+ "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
+ PCPU_GET(fpcurthread), curthread, hw_float);
+ panic("npxtrap from nowhere");
+ }
+ critical_enter();
+ if (PCPU_GET(fpcurthread) != curthread)
+ mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
+ else
+ stmxcsr(&mxcsr);
+ critical_exit();
+ return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
+}
+#endif
+
/*
* Implement device not available (DNA) exception
*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/linux/linux.h
--- a/head/sys/i386/linux/linux.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/linux/linux.h Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD: head/sys/i386/linux/linux.h 230132 2012-01-15 13:23:18Z uqs $
+ * $FreeBSD: head/sys/i386/linux/linux.h 235063 2012-05-05 19:42:38Z netchild $
*/
#ifndef _I386_LINUX_H_
@@ -42,6 +42,7 @@
#define ldebug(name) isclr(linux_debug_map, LINUX_SYS_linux_ ## name)
#define ARGS(nm, fmt) "linux(%ld): "#nm"("fmt")\n", (long)td->td_proc->p_pid
#define LMSG(fmt) "linux(%ld): "fmt"\n", (long)td->td_proc->p_pid
+#define LINUX_DTRACE linuxulator
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_LINUX);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/linux/linux_dummy.c
--- a/head/sys/i386/linux/linux_dummy.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/linux/linux_dummy.c Wed Jul 25 16:40:53 2012 +0300
@@ -27,16 +27,25 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/linux/linux_dummy.c 234352 2012-04-16 21:22:02Z jkim $");
+__FBSDID("$FreeBSD: head/sys/i386/linux/linux_dummy.c 235063 2012-05-05 19:42:38Z netchild $");
+
+#include "opt_compat.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/sdt.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <i386/linux/linux.h>
#include <i386/linux/linux_proto.h>
+#include <compat/linux/linux_dtrace.h>
#include <compat/linux/linux_util.h>
+/* DTrace init */
+LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
+
DUMMY(stime);
DUMMY(fstat);
DUMMY(olduname);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/i386/xen/pmap.c
--- a/head/sys/i386/xen/pmap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/i386/xen/pmap.c Wed Jul 25 16:40:53 2012 +0300
@@ -75,7 +75,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/xen/pmap.c 229007 2011-12-30 18:16:15Z alc $");
+__FBSDID("$FreeBSD: head/sys/i386/xen/pmap.c 236534 2012-06-04 03:51:08Z alc $");
/*
* Manages physical address maps.
@@ -179,7 +179,6 @@
#define PMAP_INLINE
#endif
-#define PV_STATS
#ifdef PV_STATS
#define PV_STAT(x) do { x ; } while (0)
#else
@@ -230,6 +229,7 @@
/*
* Data for the pv entry allocation mechanism
*/
+static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
static int shpgperproc = PMAP_SHPGPERPROC;
@@ -277,8 +277,9 @@
"Number of times pmap_pte_quick didn't change PMAP1");
static struct mtx PMAP2mutex;
+static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
+static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
vm_offset_t va);
@@ -1914,6 +1915,7 @@
CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
CTASSERT(_NPCM == 11);
+CTASSERT(_NPCPV == 336);
static __inline struct pv_chunk *
pv_to_chunk(pv_entry_t pv)
@@ -1927,7 +1929,7 @@
#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
#define PC_FREE10 0x0000fffful /* Free values for index 10 */
-static uint32_t pc_freemask[11] = {
+static const uint32_t pc_freemask[_NPCM] = {
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
@@ -1958,74 +1960,140 @@
"Current number of pv entry allocs");
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
"Current number of spare pv entries");
-
-static int pmap_collect_inactive, pmap_collect_active;
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
- "Current number times pmap_collect called on inactive queue");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
- "Current number times pmap_collect called on active queue");
#endif
/*
* We are in a serious low memory condition. Resort to
* drastic measures to free some pages so we can allocate
- * another pv entry chunk. This is normally called to
- * unmap inactive pages, and if necessary, active pages.
+ * another pv entry chunk.
*/
-static void
-pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
+static vm_page_t
+pmap_pv_reclaim(pmap_t locked_pmap)
{
+ struct pch newtail;
+ struct pv_chunk *pc;
pmap_t pmap;
pt_entry_t *pte, tpte;
- pv_entry_t next_pv, pv;
+ pv_entry_t pv;
vm_offset_t va;
- vm_page_t m, free;
-
+ vm_page_t free, m, m_pc;
+ uint32_t inuse;
+ int bit, field, freed;
+
+ PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+ pmap = NULL;
+ free = m_pc = NULL;
+ TAILQ_INIT(&newtail);
sched_pin();
- TAILQ_FOREACH(m, &vpq->pl, pageq) {
- if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
- continue;
- TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
+ while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
+ free == NULL)) {
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ if (pmap != pc->pc_pmap) {
+ if (pmap != NULL) {
+ pmap_invalidate_all(pmap);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ }
+ pmap = pc->pc_pmap;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap)
PMAP_LOCK(pmap);
- else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
+ else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
+ pmap = NULL;
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
continue;
- pmap->pm_stats.resident_count--;
- pte = pmap_pte_quick(pmap, va);
- tpte = pte_load_clear(pte);
- KASSERT((tpte & PG_W) == 0,
- ("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
- if (tpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- free = NULL;
- pmap_unuse_pt(pmap, va, &free);
- pmap_invalidate_page(pmap, va);
- pmap_free_zero_pages(free);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- free_pv_entry(pmap, pv);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
+ }
}
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
+
+ /*
+ * Destroy every non-wired, 4 KB page mapping in the chunk.
+ */
+ freed = 0;
+ for (field = 0; field < _NPCM; field++) {
+ for (inuse = ~pc->pc_map[field] & pc_freemask[field];
+ inuse != 0; inuse &= ~(1UL << bit)) {
+ bit = bsfl(inuse);
+ pv = &pc->pc_pventry[field * 32 + bit];
+ va = pv->pv_va;
+ pte = pmap_pte_quick(pmap, va);
+ if ((*pte & PG_W) != 0)
+ continue;
+ tpte = pte_load_clear(pte);
+ if ((tpte & PG_G) != 0)
+ pmap_invalidate_page(pmap, va);
+ m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
+ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+ vm_page_dirty(m);
+ if ((tpte & PG_A) != 0)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ pc->pc_map[field] |= 1UL << bit;
+ pmap_unuse_pt(pmap, va, &free);
+ freed++;
+ }
+ }
+ if (freed == 0) {
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+ continue;
+ }
+ /* Every freed mapping is for a 4 KB page. */
+ pmap->pm_stats.resident_count -= freed;
+ PV_STAT(pv_entry_frees += freed);
+ PV_STAT(pv_entry_spare += freed);
+ pv_entry_count -= freed;
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ for (field = 0; field < _NPCM; field++)
+ if (pc->pc_map[field] != pc_freemask[field]) {
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
+ pc_list);
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+
+ /*
+ * One freed pv entry in locked_pmap is
+ * sufficient.
+ */
+ if (pmap == locked_pmap)
+ goto out;
+ break;
+ }
+ if (field == _NPCM) {
+ PV_STAT(pv_entry_spare -= _NPCPV);
+ PV_STAT(pc_chunk_count--);
+ PV_STAT(pc_chunk_frees++);
+ /* Entire chunk is free; return it. */
+ m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
+ pmap_qremove((vm_offset_t)pc, 1);
+ pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
+ break;
+ }
}
+out:
sched_unpin();
+ TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
+ if (pmap != NULL) {
+ pmap_invalidate_all(pmap);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ }
+ if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
+ m_pc = free;
+ free = m_pc->right;
+ /* Recycle a freed page table page. */
+ m_pc->wire_count = 1;
+ atomic_add_int(&cnt.v_wire_count, 1);
+ }
+ pmap_free_zero_pages(free);
+ return (m_pc);
}
-
/*
* free the pv_entry back to the free list
*/
static void
free_pv_entry(pmap_t pmap, pv_entry_t pv)
{
- vm_page_t m;
struct pv_chunk *pc;
int idx, field, bit;
@@ -2039,13 +2107,30 @@
field = idx / 32;
bit = idx % 32;
pc->pc_map[field] |= 1ul << bit;
- /* move to head of list */
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
for (idx = 0; idx < _NPCM; idx++)
if (pc->pc_map[idx] != pc_freemask[idx]) {
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ /*
+ * 98% of the time, pc is already at the head of the
+ * list. If it isn't already, move it to the head.
+ */
+ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
+ pc)) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
+ pc_list);
+ }
return;
}
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ free_pv_chunk(pc);
+}
+
+static void
+free_pv_chunk(struct pv_chunk *pc)
+{
+ vm_page_t m;
+
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
PV_STAT(pv_entry_spare -= _NPCPV);
PV_STAT(pc_chunk_count--);
PV_STAT(pc_chunk_frees++);
@@ -2062,11 +2147,10 @@
* when needed.
*/
static pv_entry_t
-get_pv_entry(pmap_t pmap, int try)
+get_pv_entry(pmap_t pmap, boolean_t try)
{
static const struct timeval printinterval = { 60, 0 };
static struct timeval lastprint;
- struct vpgqueues *pq;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
@@ -2081,7 +2165,6 @@
printf("Approaching the limit on PV entries, consider "
"increasing either the vm.pmap.shpgperproc or the "
"vm.pmap.pv_entry_max tunable.\n");
- pq = NULL;
retry:
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
if (pc != NULL) {
@@ -2111,29 +2194,16 @@
* queues lock. If "pv_vafree" is currently non-empty, it will
* remain non-empty until pmap_ptelist_alloc() completes.
*/
- if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, (pq ==
- &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
+ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
if (try) {
pv_entry_count--;
PV_STAT(pc_chunk_tryfail++);
return (NULL);
}
- /*
- * Reclaim pv entries: At first, destroy mappings to
- * inactive pages. After that, if a pv chunk entry
- * is still needed, destroy mappings to active pages.
- */
- if (pq == NULL) {
- PV_STAT(pmap_collect_inactive++);
- pq = &vm_page_queues[PQ_INACTIVE];
- } else if (pq == &vm_page_queues[PQ_INACTIVE]) {
- PV_STAT(pmap_collect_active++);
- pq = &vm_page_queues[PQ_ACTIVE];
- } else
- panic("get_pv_entry: increase vm.pmap.shpgperproc");
- pmap_collect(pmap, pq);
- goto retry;
+ m = pmap_pv_reclaim(pmap);
+ if (m == NULL)
+ goto retry;
}
PV_STAT(pc_chunk_count++);
PV_STAT(pc_chunk_allocs++);
@@ -2145,6 +2215,7 @@
pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
for (field = 1; field < _NPCM; field++)
pc->pc_map[field] = pc_freemask[field];
+ TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(pv_entry_spare += _NPCPV - 1);
@@ -3470,7 +3541,7 @@
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
for (field = 0; field < _NPCM; field++) {
- inuse = (~(pc->pc_map[field])) & pc_freemask[field];
+ inuse = ~pc->pc_map[field] & pc_freemask[field];
while (inuse != 0) {
bit = bsfl(inuse);
bitmask = 1UL << bit;
@@ -3531,15 +3602,8 @@
}
PT_UPDATES_FLUSH();
if (allfree) {
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
- pmap_qremove((vm_offset_t)pc, 1);
- vm_page_unwire(m, 0);
- vm_page_free(m);
- pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
+ free_pv_chunk(pc);
}
}
PT_UPDATES_FLUSH();
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/acpica/acpi_wakeup.c
--- a/head/sys/ia64/acpica/acpi_wakeup.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/acpica/acpi_wakeup.c Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/ia64/acpica/acpi_wakeup.c 236409 2012-06-01 17:07:52Z jkim $
*/
#include <sys/param.h>
@@ -39,6 +39,13 @@
return (0);
}
+int
+acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result,
+ int intr_enabled)
+{
+ return (0);
+}
+
void
acpi_install_wakeup_handler(struct acpi_softc *sc)
{
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/ia64/busdma_machdep.c
--- a/head/sys/ia64/ia64/busdma_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/ia64/busdma_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ia64/ia64/busdma_machdep.c 232356 2012-03-01 19:58:34Z jhb $");
+__FBSDID("$FreeBSD: head/sys/ia64/ia64/busdma_machdep.c 238184 2012-07-07 00:25:17Z marcel $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -262,7 +262,7 @@
atomic_add_int(&parent->ref_count, 1);
}
- if (newtag->lowaddr < ptoa(Maxmem) && (flags & BUS_DMA_ALLOCNOW) != 0) {
+ if (newtag->lowaddr < paddr_max && (flags & BUS_DMA_ALLOCNOW) != 0) {
/* Must bounce */
if (ptoa(total_bpages) < maxsize) {
@@ -340,7 +340,7 @@
* exclusion region, a data alignment that is stricter than 1, and/or
* an active address boundary.
*/
- if (dmat->lowaddr < ptoa(Maxmem)) {
+ if (dmat->lowaddr < paddr_max) {
/* Must bounce */
int maxpages;
@@ -356,7 +356,7 @@
* Attempt to add pages to our pool on a per-instance
* basis up to a sane limit.
*/
- maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->lowaddr));
+ maxpages = MIN(MAX_BPAGES, atop(paddr_max - dmat->lowaddr));
if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0
|| (dmat->map_count > 0 && total_bpages < maxpages)) {
int pages;
@@ -438,7 +438,7 @@
*/
if ((dmat->maxsize <= PAGE_SIZE) &&
(dmat->alignment < dmat->maxsize) &&
- dmat->lowaddr >= ptoa(Maxmem)) {
+ dmat->lowaddr >= paddr_max) {
*vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags);
} else {
/*
@@ -473,7 +473,7 @@
panic("bus_dmamem_free: Invalid map freed\n");
if ((dmat->maxsize <= PAGE_SIZE) &&
(dmat->alignment < dmat->maxsize) &&
- dmat->lowaddr >= ptoa(Maxmem))
+ dmat->lowaddr >= paddr_max)
free(vaddr, M_DEVBUF);
else {
contigfree(vaddr, dmat->maxsize, M_DEVBUF);
@@ -506,7 +506,7 @@
else
pmap = NULL;
- if ((dmat->lowaddr < ptoa(Maxmem) || dmat->boundary > 0 ||
+ if ((dmat->lowaddr < paddr_max || dmat->boundary > 0 ||
dmat->alignment > 1) && map != &nobounce_dmamap &&
map->pagesneeded == 0) {
vm_offset_t vendaddr;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/ia64/machdep.c
--- a/head/sys/ia64/ia64/machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/ia64/machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ia64/ia64/machdep.c 232250 2012-02-28 13:19:34Z gavin $");
+__FBSDID("$FreeBSD: head/sys/ia64/ia64/machdep.c 238257 2012-07-08 18:00:22Z marcel $");
#include "opt_compat.h"
#include "opt_ddb.h"
@@ -152,22 +152,11 @@
extern vm_offset_t ksym_start, ksym_end;
#endif
-
struct msgbuf *msgbufp = NULL;
/* Other subsystems (e.g., ACPI) can hook this later. */
void (*cpu_idle_hook)(void) = NULL;
-long Maxmem = 0;
-long realmem = 0;
-
-#define PHYSMAP_SIZE (2 * VM_PHYSSEG_MAX)
-
-vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
-
-/* must be 2 less so 0 0 can signal end of chunks */
-#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
-
struct kva_md_info kmi;
#define Mhz 1000000L
@@ -270,25 +259,8 @@
#ifdef PERFMON
perfmon_init();
#endif
- printf("real memory = %ld (%ld MB)\n", ia64_ptob(Maxmem),
- ia64_ptob(Maxmem) / 1048576);
- realmem = Maxmem;
-
- /*
- * Display any holes after the first chunk of extended memory.
- */
- if (bootverbose) {
- int indx;
-
- printf("Physical memory chunk(s):\n");
- for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
- long size1 = phys_avail[indx + 1] - phys_avail[indx];
-
- printf("0x%08lx - 0x%08lx, %ld bytes (%ld pages)\n",
- phys_avail[indx], phys_avail[indx + 1] - 1, size1,
- size1 >> PAGE_SHIFT);
- }
- }
+ printf("real memory = %ld (%ld MB)\n", ptoa(realmem),
+ ptoa(realmem) / 1048576);
vm_ksubmap_init(&kmi);
@@ -534,6 +506,14 @@
}
void
+cpu_pcpu_setup(struct pcpu *pc, u_int acpi_id, u_int sapic_id)
+{
+
+ pc->pc_acpi_id = acpi_id;
+ pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id);
+}
+
+void
spinlock_enter(void)
{
struct thread *td;
@@ -700,43 +680,86 @@
ia64_init(void)
{
struct ia64_init_return ret;
- int phys_avail_cnt;
- vm_offset_t kernstart, kernend;
- vm_offset_t kernstartpfn, kernendpfn, pfn0, pfn1;
+ struct efi_md *md;
+ pt_entry_t *pbvm_pgtbl_ent, *pbvm_pgtbl_lim;
char *p;
- struct efi_md *md;
+ vm_size_t mdlen;
int metadata_missing;
- /* NO OUTPUT ALLOWED UNTIL FURTHER NOTICE */
+ /*
+ * NO OUTPUT ALLOWED UNTIL FURTHER NOTICE.
+ */
- /*
- * TODO: Disable interrupts, floating point etc.
- * Maybe flush cache and tlb
- */
ia64_set_fpsr(IA64_FPSR_DEFAULT);
/*
- * TODO: Get critical system information (if possible, from the
- * information provided by the boot program).
+ * Region 6 is direct mapped UC and region 7 is direct mapped
+ * WC. The details of this is controlled by the Alt {I,D}TLB
+ * handlers. Here we just make sure that they have the largest
+ * possible page size to minimise TLB usage.
*/
+ ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (PAGE_SHIFT << 2));
+ ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (PAGE_SHIFT << 2));
+ ia64_srlz_d();
+
+ /* Initialize/setup physical memory datastructures */
+ ia64_physmem_init();
/*
- * Look for the I/O ports first - we need them for console
- * probing.
+ * Process the memory map. This gives us the PAL locations,
+ * the I/O port base address, the available memory regions
+ * for initializing the physical memory map.
*/
for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) {
+ mdlen = md->md_pages * EFI_PAGE_SIZE;
switch (md->md_type) {
case EFI_MD_TYPE_IOPORT:
ia64_port_base = (uintptr_t)pmap_mapdev(md->md_phys,
- md->md_pages * EFI_PAGE_SIZE);
+ mdlen);
break;
case EFI_MD_TYPE_PALCODE:
- ia64_pal_size = md->md_pages * EFI_PAGE_SIZE;
ia64_pal_base = md->md_phys;
+ ia64_pal_size = mdlen;
+ /*FALLTHROUGH*/
+ case EFI_MD_TYPE_BAD:
+ case EFI_MD_TYPE_FIRMWARE:
+ case EFI_MD_TYPE_RECLAIM:
+ case EFI_MD_TYPE_RT_CODE:
+ case EFI_MD_TYPE_RT_DATA:
+ /* Don't use these memory regions. */
+ ia64_physmem_track(md->md_phys, mdlen);
+ break;
+ case EFI_MD_TYPE_BS_CODE:
+ case EFI_MD_TYPE_BS_DATA:
+ case EFI_MD_TYPE_CODE:
+ case EFI_MD_TYPE_DATA:
+ case EFI_MD_TYPE_FREE:
+ /* These are ok to use. */
+ ia64_physmem_add(md->md_phys, mdlen);
break;
}
}
+ /*
+ * Remove the PBVM and its page table from phys_avail. The loader
+ * passes the physical address of the page table to us. The virtual
+ * address of the page table is fixed.
+ * Track and the PBVM limit for later use.
+ */
+ ia64_physmem_delete(bootinfo->bi_pbvm_pgtbl, bootinfo->bi_pbvm_pgtblsz);
+ pbvm_pgtbl_ent = (void *)IA64_PBVM_PGTBL;
+ pbvm_pgtbl_lim = (void *)(IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz);
+ while (pbvm_pgtbl_ent < pbvm_pgtbl_lim) {
+ if ((*pbvm_pgtbl_ent & PTE_PRESENT) == 0)
+ break;
+ ia64_physmem_delete(*pbvm_pgtbl_ent & PTE_PPN_MASK,
+ IA64_PBVM_PAGE_SIZE);
+ pbvm_pgtbl_ent++;
+ }
+
+ /* Finalize physical memory datastructures */
+ ia64_physmem_fini();
+
metadata_missing = 0;
if (bootinfo->bi_modulep)
preload_metadata = (caddr_t)bootinfo->bi_modulep;
@@ -757,31 +780,6 @@
bootverbose = 1;
/*
- * Find the beginning and end of the kernel.
- */
- kernstart = trunc_page(kernel_text);
-#ifdef DDB
- ksym_start = bootinfo->bi_symtab;
- ksym_end = bootinfo->bi_esymtab;
- kernend = (vm_offset_t)round_page(ksym_end);
-#else
- kernend = (vm_offset_t)round_page(_end);
-#endif
- /* But if the bootstrap tells us otherwise, believe it! */
- if (bootinfo->bi_kernend)
- kernend = round_page(bootinfo->bi_kernend);
-
- /*
- * Region 6 is direct mapped UC and region 7 is direct mapped
- * WC. The details of this is controlled by the Alt {I,D}TLB
- * handlers. Here we just make sure that they have the largest
- * possible page size to minimise TLB usage.
- */
- ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (PAGE_SHIFT << 2));
- ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (PAGE_SHIFT << 2));
- ia64_srlz_d();
-
- /*
* Wire things up so we can call the firmware.
*/
map_pal_code();
@@ -800,9 +798,8 @@
pcpup = &pcpu0;
ia64_set_k4((u_int64_t)pcpup);
pcpu_init(pcpup, 0, sizeof(pcpu0));
- dpcpu_init((void *)kernend, 0);
- PCPU_SET(md.lid, ia64_get_lid());
- kernend += DPCPU_SIZE;
+ dpcpu_init(ia64_physmem_alloc(DPCPU_SIZE, PAGE_SIZE), 0);
+ cpu_pcpu_setup(pcpup, ~0U, ia64_get_lid());
PCPU_SET(curthread, &thread0);
/*
@@ -828,105 +825,20 @@
freeenv(p);
}
- kernstartpfn = atop(IA64_RR_MASK(kernstart));
- kernendpfn = atop(IA64_RR_MASK(kernend));
-
- /*
- * Size the memory regions and load phys_avail[] with the results.
- */
-
- /*
- * Find out how much memory is available, by looking at
- * the memory descriptors.
- */
-
-#ifdef DEBUG_MD
- printf("Memory descriptor count: %d\n", mdcount);
-#endif
-
- phys_avail_cnt = 0;
- for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) {
-#ifdef DEBUG_MD
- printf("MD %p: type %d pa 0x%lx cnt 0x%lx\n", md,
- md->md_type, md->md_phys, md->md_pages);
-#endif
-
- pfn0 = ia64_btop(round_page(md->md_phys));
- pfn1 = ia64_btop(trunc_page(md->md_phys + md->md_pages * 4096));
- if (pfn1 <= pfn0)
- continue;
-
- if (md->md_type != EFI_MD_TYPE_FREE)
- continue;
-
- /*
- * We have a memory descriptor that describes conventional
- * memory that is for general use. We must determine if the
- * loader has put the kernel in this region.
- */
- physmem += (pfn1 - pfn0);
- if (pfn0 <= kernendpfn && kernstartpfn <= pfn1) {
- /*
- * Must compute the location of the kernel
- * within the segment.
- */
-#ifdef DEBUG_MD
- printf("Descriptor %p contains kernel\n", mp);
-#endif
- if (pfn0 < kernstartpfn) {
- /*
- * There is a chunk before the kernel.
- */
-#ifdef DEBUG_MD
- printf("Loading chunk before kernel: "
- "0x%lx / 0x%lx\n", pfn0, kernstartpfn);
-#endif
- phys_avail[phys_avail_cnt] = ia64_ptob(pfn0);
- phys_avail[phys_avail_cnt+1] = ia64_ptob(kernstartpfn);
- phys_avail_cnt += 2;
- }
- if (kernendpfn < pfn1) {
- /*
- * There is a chunk after the kernel.
- */
-#ifdef DEBUG_MD
- printf("Loading chunk after kernel: "
- "0x%lx / 0x%lx\n", kernendpfn, pfn1);
-#endif
- phys_avail[phys_avail_cnt] = ia64_ptob(kernendpfn);
- phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1);
- phys_avail_cnt += 2;
- }
- } else {
- /*
- * Just load this cluster as one chunk.
- */
-#ifdef DEBUG_MD
- printf("Loading descriptor %d: 0x%lx / 0x%lx\n", i,
- pfn0, pfn1);
-#endif
- phys_avail[phys_avail_cnt] = ia64_ptob(pfn0);
- phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1);
- phys_avail_cnt += 2;
-
- }
- }
- phys_avail[phys_avail_cnt] = 0;
-
- Maxmem = physmem;
init_param2(physmem);
/*
* Initialize error message buffer (at end of core).
*/
- msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize);
+ msgbufp = ia64_physmem_alloc(msgbufsize, PAGE_SIZE);
msgbufinit(msgbufp, msgbufsize);
proc_linkup0(&proc0, &thread0);
/*
* Init mapping for kernel stack for proc 0
*/
- thread0.td_kstack = pmap_steal_memory(KSTACK_PAGES * PAGE_SIZE);
+ p = ia64_physmem_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE);
+ thread0.td_kstack = (uintptr_t)p;
thread0.td_kstack_pages = KSTACK_PAGES;
mutex_init();
@@ -952,6 +864,11 @@
/*
* Initialize debuggers, and break into them if appropriate.
*/
+#ifdef DDB
+ ksym_start = bootinfo->bi_symtab;
+ ksym_end = bootinfo->bi_esymtab;
+#endif
+
kdb_init();
#ifdef KDB
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/ia64/mp_machdep.c
--- a/head/sys/ia64/ia64/mp_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/ia64/mp_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ia64/ia64/mp_machdep.c 223758 2011-07-04 12:04:52Z attilio $");
+__FBSDID("$FreeBSD: head/sys/ia64/ia64/mp_machdep.c 238257 2012-07-08 18:00:22Z marcel $");
#include "opt_kstack_pages.h"
@@ -309,9 +309,8 @@
} else
pc = pcpup;
- pc->pc_acpi_id = acpi_id;
- pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id);
-
+ cpu_pcpu_setup(pc, acpi_id, sapic_id);
+
CPU_SET(pc->pc_cpuid, &all_cpus);
}
@@ -466,6 +465,7 @@
*/
ia64_bind_intr();
}
+SYSINIT(start_aps, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, cpu_mp_unleash, NULL);
/*
* send an IPI to a set of cpus.
@@ -522,5 +522,3 @@
ia64_mf_a();
CTR3(KTR_SMP, "ipi_send(%p, %d): cpuid=%d", cpu, xiv, PCPU_GET(cpuid));
}
-
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/ia64/nexus.c
--- a/head/sys/ia64/ia64/nexus.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/ia64/nexus.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/ia64/ia64/nexus.c 224184 2011-07-18 14:04:37Z jhb $
+ * $FreeBSD: head/sys/ia64/ia64/nexus.c 235041 2012-05-04 23:16:29Z marcel $
*/
/*
@@ -65,9 +65,6 @@
#include <dev/acpica/acpivar.h>
-#include <isa/isareg.h>
-#include <sys/rtprio.h>
-
#include "clock_if.h"
static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device");
@@ -191,12 +188,6 @@
nexus_attach(device_t dev)
{
- /*
- * Mask the legacy PICs - we will use the I/O SAPIC for interrupt.
- */
- outb(IO_ICU1+1, 0xff);
- outb(IO_ICU2+1, 0xff);
-
if (acpi_identify() == 0)
BUS_ADD_CHILD(dev, 10, "acpi", 0);
clock_register(dev, 1000);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/ia64/physmem.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/ia64/ia64/physmem.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,258 @@
+/*-
+ * Copyright (c) 2012 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/ia64/ia64/physmem.c 238190 2012-07-07 05:17:43Z marcel $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+
+static u_int phys_avail_segs;
+
+vm_paddr_t phys_avail[2 * VM_PHYSSEG_MAX + 2];
+
+vm_paddr_t paddr_max;
+
+long realmem;
+
+static u_int
+ia64_physmem_find(vm_paddr_t base, vm_paddr_t lim)
+{
+ u_int idx;
+
+ for (idx = 0; phys_avail[idx + 1] != 0; idx += 2) {
+ if (phys_avail[idx] >= lim ||
+ phys_avail[idx + 1] > base)
+ break;
+ }
+ return (idx);
+}
+
+static int
+ia64_physmem_insert(u_int idx, vm_paddr_t base, vm_paddr_t lim)
+{
+ u_int ridx;
+
+ if (phys_avail_segs == VM_PHYSSEG_MAX)
+ return (ENOMEM);
+
+ ridx = phys_avail_segs * 2;
+ while (idx < ridx) {
+ phys_avail[ridx + 1] = phys_avail[ridx - 1];
+ phys_avail[ridx] = phys_avail[ridx - 2];
+ ridx -= 2;
+ }
+ phys_avail[idx] = base;
+ phys_avail[idx + 1] = lim;
+ phys_avail_segs++;
+ return (0);
+}
+
+static int
+ia64_physmem_remove(u_int idx)
+{
+
+ if (phys_avail_segs == 0)
+ return (ENOENT);
+ do {
+ phys_avail[idx] = phys_avail[idx + 2];
+ phys_avail[idx + 1] = phys_avail[idx + 3];
+ idx += 2;
+ } while (phys_avail[idx + 1] != 0);
+ phys_avail_segs--;
+ return (0);
+}
+
+int
+ia64_physmem_add(vm_paddr_t base, vm_size_t len)
+{
+ vm_paddr_t lim;
+ u_int idx;
+
+ realmem += len;
+
+ lim = base + len;
+ idx = ia64_physmem_find(base, lim);
+ if (phys_avail[idx] == lim) {
+ phys_avail[idx] = base;
+ return (0);
+ }
+ if (idx > 0 && phys_avail[idx - 1] == base) {
+ phys_avail[idx - 1] = lim;
+ return (0);
+ }
+ return (ia64_physmem_insert(idx, base, lim));
+}
+
+int
+ia64_physmem_delete(vm_paddr_t base, vm_size_t len)
+{
+ vm_paddr_t lim;
+ u_int idx;
+
+ lim = base + len;
+ idx = ia64_physmem_find(base, lim);
+ if (phys_avail[idx] >= lim || phys_avail[idx + 1] == 0)
+ return (ENOENT);
+ if (phys_avail[idx] < base && phys_avail[idx + 1] > lim) {
+ len = phys_avail[idx + 1] - lim;
+ phys_avail[idx + 1] = base;
+ base = lim;
+ lim = base + len;
+ return (ia64_physmem_insert(idx + 2, base, lim));
+ } else {
+ if (phys_avail[idx] == base)
+ phys_avail[idx] = lim;
+ if (phys_avail[idx + 1] == lim)
+ phys_avail[idx + 1] = base;
+ if (phys_avail[idx] >= phys_avail[idx + 1])
+ return (ia64_physmem_remove(idx));
+ }
+ return (0);
+}
+
+int
+ia64_physmem_fini(void)
+{
+ vm_paddr_t base, lim, size;
+ u_int idx;
+
+ idx = 0;
+ while (phys_avail[idx + 1] != 0) {
+ base = round_page(phys_avail[idx]);
+ lim = trunc_page(phys_avail[idx + 1]);
+ if (base < lim) {
+ phys_avail[idx] = base;
+ phys_avail[idx + 1] = lim;
+ size = lim - base;
+ physmem += atop(size);
+ paddr_max = lim;
+ idx += 2;
+ } else
+ ia64_physmem_remove(idx);
+ }
+
+ /*
+ * Round realmem to a multple of 128MB. Hopefully that compensates
+ * for any loss of DRAM that isn't accounted for in the memory map.
+ * I'm thinking legacy BIOS or VGA here. In any case, it's ok if
+ * we got it wrong, because we don't actually use realmem. It's
+ * just for show...
+ */
+ size = 1U << 27;
+ realmem = (realmem + size - 1) & ~(size - 1);
+ realmem = atop(realmem);
+ return (0);
+}
+
+int
+ia64_physmem_init(void)
+{
+
+ /* Nothing to do just yet. */
+ return (0);
+}
+
+int
+ia64_physmem_track(vm_paddr_t base, vm_size_t len)
+{
+
+ realmem += len;
+ return (0);
+}
+
+void *
+ia64_physmem_alloc(vm_size_t len, vm_size_t align)
+{
+ vm_paddr_t base, lim, pa;
+ void *ptr;
+ u_int idx;
+
+ if (phys_avail_segs == 0)
+ return (NULL);
+
+ len = round_page(len);
+
+ /*
+ * Try and allocate with least effort.
+ */
+ idx = phys_avail_segs * 2;
+ while (idx > 0) {
+ idx -= 2;
+ base = phys_avail[idx];
+ lim = phys_avail[idx + 1];
+
+ if (lim - base < len)
+ continue;
+
+ /* First try from the end. */
+ pa = lim - len;
+ if ((pa & (align - 1)) == 0) {
+ if (pa == base)
+ ia64_physmem_remove(idx);
+ else
+ phys_avail[idx + 1] = pa;
+ goto gotit;
+ }
+
+ /* Try from the start next. */
+ pa = base;
+ if ((pa & (align - 1)) == 0) {
+ if (pa + len == lim)
+ ia64_physmem_remove(idx);
+ else
+ phys_avail[idx] += len;
+ goto gotit;
+ }
+ }
+
+ /*
+ * Find a good segment and split it up.
+ */
+ idx = phys_avail_segs * 2;
+ while (idx > 0) {
+ idx -= 2;
+ base = phys_avail[idx];
+ lim = phys_avail[idx + 1];
+
+ pa = (base + align - 1) & ~(align - 1);
+ if (pa + len <= lim) {
+ ia64_physmem_delete(pa, len);
+ goto gotit;
+ }
+ }
+
+ /* Out of luck. */
+ return (NULL);
+
+ gotit:
+ ptr = (void *)IA64_PHYS_TO_RR7(pa);
+ bzero(ptr, len);
+ return (ptr);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/ia64/pmap.c
--- a/head/sys/ia64/ia64/pmap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/ia64/pmap.c Wed Jul 25 16:40:53 2012 +0300
@@ -46,7 +46,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 238190 2012-07-07 05:17:43Z marcel $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -243,36 +243,6 @@
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m);
-vm_offset_t
-pmap_steal_memory(vm_size_t size)
-{
- vm_size_t bank_size;
- vm_offset_t pa, va;
-
- size = round_page(size);
-
- bank_size = phys_avail[1] - phys_avail[0];
- while (size > bank_size) {
- int i;
- for (i = 0; phys_avail[i+2]; i+= 2) {
- phys_avail[i] = phys_avail[i+2];
- phys_avail[i+1] = phys_avail[i+3];
- }
- phys_avail[i] = 0;
- phys_avail[i+1] = 0;
- if (!phys_avail[0])
- panic("pmap_steal_memory: out of memory");
- bank_size = phys_avail[1] - phys_avail[0];
- }
-
- pa = phys_avail[0];
- phys_avail[0] += size;
-
- va = IA64_PHYS_TO_RR7(pa);
- bzero((caddr_t) va, size);
- return va;
-}
-
static void
pmap_initialize_vhpt(vm_offset_t vhpt)
{
@@ -289,21 +259,23 @@
}
#ifdef SMP
-MALLOC_DECLARE(M_SMP);
-
vm_offset_t
pmap_alloc_vhpt(void)
{
vm_offset_t vhpt;
+ vm_page_t m;
vm_size_t size;
size = 1UL << pmap_vhpt_log2size;
- vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL);
- if (vhpt != 0) {
- vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt));
+ m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
+ VM_MEMATTR_DEFAULT);
+ if (m != NULL) {
+ vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
pmap_initialize_vhpt(vhpt);
+ return (vhpt);
}
- return (vhpt);
+ return (0);
}
#endif
@@ -316,7 +288,7 @@
struct ia64_pal_result res;
vm_offset_t base;
size_t size;
- int i, j, count, ridbits;
+ int i, ridbits;
/*
* Query the PAL Code to find the loop parameters for the
@@ -378,7 +350,7 @@
pmap_ridmax = (1 << ridbits);
pmap_ridmapsz = pmap_ridmax / 64;
- pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
+ pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
pmap_ridmap[0] |= 0xff;
pmap_rididx = 0;
pmap_ridcount = 8;
@@ -387,14 +359,10 @@
/*
* Allocate some memory for initial kernel 'page tables'.
*/
- ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
+ ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
nkpt = 0;
kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
- for (i = 0; phys_avail[i+2]; i+= 2)
- ;
- count = i+2;
-
/*
* Determine a valid (mappable) VHPT size.
*/
@@ -408,35 +376,18 @@
if (pmap_vhpt_log2size & 1)
pmap_vhpt_log2size--;
- base = 0;
size = 1UL << pmap_vhpt_log2size;
- for (i = 0; i < count; i += 2) {
- base = (phys_avail[i] + size - 1) & ~(size - 1);
- if (base + size <= phys_avail[i+1])
- break;
- }
- if (!phys_avail[i])
+ base = (uintptr_t)ia64_physmem_alloc(size, size);
+ if (base == 0)
panic("Unable to allocate VHPT");
- if (base != phys_avail[i]) {
- /* Split this region. */
- for (j = count; j > i; j -= 2) {
- phys_avail[j] = phys_avail[j-2];
- phys_avail[j+1] = phys_avail[j-2+1];
- }
- phys_avail[i+1] = base;
- phys_avail[i+2] = base + size;
- } else
- phys_avail[i] = base + size;
-
- base = IA64_PHYS_TO_RR7(base);
PCPU_SET(md.vhpt, base);
if (bootverbose)
printf("VHPT: address=%#lx, size=%#lx\n", base, size);
pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
- pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
- sizeof(struct ia64_bucket));
+ pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
+ sizeof(struct ia64_bucket), PAGE_SIZE);
for (i = 0; i < pmap_vhpt_nbuckets; i++) {
/* Stolen memory is zeroed. */
mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/_stdint.h
--- a/head/sys/ia64/include/_stdint.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/include/_stdint.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/ia64/include/_stdint.h 237517 2012-06-24 04:15:58Z andrew $
*/
#ifndef _MACHINE__STDINT_H_
@@ -149,12 +149,6 @@
/* Limit of size_t. */
#define SIZE_MAX UINT64_MAX
-#ifndef WCHAR_MIN /* Also possibly defined in <wchar.h> */
-/* Limits of wchar_t. */
-#define WCHAR_MIN INT32_MIN
-#define WCHAR_MAX INT32_MAX
-#endif
-
/* Limits of wint_t. */
#define WINT_MIN INT32_MIN
#define WINT_MAX INT32_MAX
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/_types.h
--- a/head/sys/ia64/include/_types.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/include/_types.h Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*
* From: @(#)ansi.h 8.2 (Berkeley) 1/4/94
* From: @(#)types.h 8.3 (Berkeley) 1/5/94
- * $FreeBSD: head/sys/ia64/include/_types.h 228469 2011-12-13 13:38:03Z ed $
+ * $FreeBSD: head/sys/ia64/include/_types.h 237517 2012-06-24 04:15:58Z andrew $
*/
#ifndef _MACHINE__TYPES_H_
@@ -96,6 +96,10 @@
typedef __uint64_t __vm_paddr_t;
typedef __uint64_t __vm_pindex_t;
typedef __uint64_t __vm_size_t;
+typedef int __wchar_t;
+
+#define __WCHAR_MIN __INT_MIN /* min value for a wchar_t */
+#define __WCHAR_MAX __INT_MAX /* max value for a wchar_t */
/*
* Unusual type definitions.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/elf.h
--- a/head/sys/ia64/include/elf.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/include/elf.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/ia64/include/elf.h 237430 2012-06-22 06:38:31Z kib $
*/
#ifndef _MACHINE_ELF_H_
@@ -95,6 +95,7 @@
#define AT_NCPUS 19 /* Number of CPUs. */
#define AT_PAGESIZES 20 /* Pagesizes. */
#define AT_PAGESIZESLEN 21 /* Number of pagesizes. */
+#define AT_TIMEKEEP 22 /* Pointer to timehands. */
#define AT_STACKPROT 23 /* Initial stack protection. */
#define AT_COUNT 24 /* Count of defined aux entry types. */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/in_cksum.h
--- a/head/sys/ia64/include/in_cksum.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/include/in_cksum.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* from tahoe: in_cksum.c 1.2 86/01/05
* from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
* from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp
- * $FreeBSD$
+ * $FreeBSD: head/sys/ia64/include/in_cksum.h 235941 2012-05-24 22:00:48Z bz $
*/
#ifndef _MACHINE_IN_CKSUM_H_
@@ -39,6 +39,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -65,9 +66,12 @@
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/md_var.h
--- a/head/sys/ia64/include/md_var.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/include/md_var.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/ia64/include/md_var.h 238257 2012-07-08 18:00:22Z marcel $
*/
#ifndef _MACHINE_MD_VAR_H_
@@ -61,6 +61,7 @@
#ifdef _KERNEL
struct _special;
+struct pcpu;
struct thread;
struct trapframe;
@@ -73,14 +74,14 @@
};
extern uint64_t ia64_lapic_addr;
-
-extern long Maxmem;
+extern vm_paddr_t paddr_max;
extern u_int busdma_swi_pending;
void *acpi_find_table(const char *sig);
void busdma_swi(void);
int copyout_regstack(struct thread *, uint64_t *, uint64_t *);
void cpu_mp_add(u_int, u_int, u_int);
+void cpu_pcpu_setup(struct pcpu *, u_int, u_int);
int do_ast(struct trapframe *);
void ia32_trap(int, struct trapframe *);
int ia64_count_cpus(void);
@@ -93,6 +94,12 @@
int ia64_highfp_save_ipi(void);
struct ia64_init_return ia64_init(void);
u_int ia64_itc_freq(void);
+int ia64_physmem_add(vm_paddr_t, vm_size_t);
+void *ia64_physmem_alloc(vm_size_t, vm_size_t);
+int ia64_physmem_delete(vm_paddr_t, vm_size_t);
+int ia64_physmem_fini(void);
+int ia64_physmem_init(void);
+int ia64_physmem_track(vm_paddr_t, vm_size_t);
void ia64_probe_sapics(void);
void ia64_sync_icache(vm_offset_t, vm_size_t);
void interrupt(struct trapframe *);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/param.h
--- a/head/sys/ia64/include/param.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/include/param.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/ia64/include/param.h 224217 2011-07-19 13:00:30Z attilio $ */
+/* $FreeBSD: head/sys/ia64/include/param.h 238184 2012-07-07 00:25:17Z marcel $ */
/* From: NetBSD: param.h,v 1.20 1997/09/19 13:52:53 leo Exp */
/*-
@@ -110,9 +110,6 @@
#define atop(x) ((unsigned long)(x) >> PAGE_SHIFT)
#define ptoa(x) ((unsigned long)(x) << PAGE_SHIFT)
-#define ia64_btop(x) ((unsigned long)(x) >> PAGE_SHIFT)
-#define ia64_ptob(x) ((unsigned long)(x) << PAGE_SHIFT)
-
#define pgtok(x) ((x) * (PAGE_SIZE / 1024))
#endif /* !_IA64_INCLUDE_PARAM_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/pcb.h
--- a/head/sys/ia64/include/pcb.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/include/pcb.h Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/ia64/include/pcb.h 234785 2012-04-29 11:04:31Z dim $
*/
#ifndef _MACHINE_PCB_H_
@@ -65,10 +65,10 @@
void makectx(struct trapframe *, struct pcb *);
void restorectx(struct pcb *) __dead2;
-int swapctx(struct pcb *old, struct pcb *new);
+int swapctx(struct pcb *old, struct pcb *new) __returns_twice;
void ia32_restorectx(struct pcb *);
-void ia32_savectx(struct pcb *);
+void ia32_savectx(struct pcb *) __returns_twice;
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/pmap.h
--- a/head/sys/ia64/include/pmap.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/ia64/include/pmap.h Wed Jul 25 16:40:53 2012 +0300
@@ -39,7 +39,7 @@
* from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
* from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
* from: i386 pmap.h,v 1.54 1997/11/20 19:30:35 bde Exp
- * $FreeBSD: head/sys/ia64/include/pmap.h 223873 2011-07-08 16:30:54Z marcel $
+ * $FreeBSD: head/sys/ia64/include/pmap.h 237168 2012-06-16 18:56:19Z alc $
*/
#ifndef _MACHINE_PMAP_H_
@@ -118,6 +118,7 @@
#define pmap_page_get_memattr(m) ((m)->md.memattr)
#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))
+#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_mapbios(pa, sz) pmap_mapdev(pa, sz)
#define pmap_unmapbios(va, sz) pmap_unmapdev(va, sz)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/ia64/include/vdso.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/ia64/include/vdso.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,41 @@
+/*-
+ * Copyright 2012 Konstantin Belousov <kib at FreeBSD.ORG>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/ia64/include/vdso.h 237433 2012-06-22 07:06:40Z kib $
+ */
+
+#ifndef _IA64_VDSO_H
+#define _IA64_VDSO_H
+
+#define VDSO_TIMEHANDS_MD \
+ uint32_t th_res[8];
+
+#ifdef _KERNEL
+#ifdef COMPAT_FREEBSD32
+
+#define VDSO_TIMEHANDS_MD32 VDSO_TIMEHANDS_MD
+
+#endif
+#endif
+#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/capabilities.conf
--- a/head/sys/kern/capabilities.conf Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/capabilities.conf Wed Jul 25 16:40:53 2012 +0300
@@ -32,7 +32,7 @@
## - sys_exit(2), abort2(2) and close(2) are very important.
## - Sorted alphabetically, please keep it that way.
##
-## $FreeBSD: head/sys/kern/capabilities.conf 224987 2011-08-18 22:51:30Z jonathan $
+## $FreeBSD: head/sys/kern/capabilities.conf 236361 2012-05-31 19:32:37Z pjd $
##
##
@@ -445,13 +445,17 @@
faccessat
fstatat
fchmodat
+fchownat
futimesat
+linkat
mkdirat
-rmdirat
mkfifoat
mknodat
openat
+readlinkat
renameat
+symlinkat
+unlinkat
##
## Allow entry into open(2). This system call will fail, since access to the
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/dtio_kdtrace.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/kern/dtio_kdtrace.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,232 @@
+/*-
+ * Copyright (c) 2012 Advanced Computing Technologies LLC
+ * Written by George Neville-Neil gnn at freebsd.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/kern/dtio_kdtrace.c 238366 2012-07-11 16:27:02Z gnn $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+
+#include <sys/dtrace.h>
+#include "../sys/dtrace_bsd.h"
+
+
+static int dtio_unload(void);
+static void dtio_getargdesc(void *, dtrace_id_t, void *,
+ dtrace_argdesc_t *);
+static void dtio_provide(void *, dtrace_probedesc_t *);
+static void dtio_destroy(void *, dtrace_id_t, void *);
+static void dtio_enable(void *, dtrace_id_t, void *);
+static void dtio_disable(void *, dtrace_id_t, void *);
+static void dtio_load(void *);
+
+static dtrace_pattr_t dtio_attr = {
+{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
+};
+
+static char *genunix = "genunix";
+
+/*
+ * Name strings.
+ */
+static char *dtio_start_str = "start";
+static char *dtio_done_str = "done";
+static char *dtio_wait_start_str = "wait-start";
+static char *dtio_wait_done_str = "wait-done";
+
+static dtrace_pops_t dtio_pops = {
+ dtio_provide,
+ NULL,
+ dtio_enable,
+ dtio_disable,
+ NULL,
+ NULL,
+ dtio_getargdesc,
+ NULL,
+ NULL,
+ dtio_destroy
+};
+
+static dtrace_provider_id_t dtio_id;
+
+extern uint32_t dtio_start_id;
+extern uint32_t dtio_done_id;
+extern uint32_t dtio_wait_start_id;
+extern uint32_t dtio_wait_done_id;
+
+static void
+dtio_getargdesc(void *arg, dtrace_id_t id, void *parg,
+ dtrace_argdesc_t *desc)
+{
+ const char *p = NULL;
+
+ switch (desc->dtargd_ndx) {
+ case 0:
+ p = "struct bio *";
+ break;
+ case 1:
+ p = "struct devstat *";
+ break;
+ default:
+ desc->dtargd_ndx = DTRACE_ARGNONE;
+ }
+
+ if (p != NULL)
+ strlcpy(desc->dtargd_native, p, sizeof(desc->dtargd_native));
+}
+
+static void
+dtio_provide(void *arg, dtrace_probedesc_t *desc)
+{
+ if (desc != NULL)
+ return;
+
+ if (dtrace_probe_lookup(dtio_id, genunix, NULL,
+ dtio_start_str) == 0) {
+ dtio_start_id = dtrace_probe_create(dtio_id, genunix, NULL,
+ dtio_start_str, 0, NULL);
+ }
+ if (dtrace_probe_lookup(dtio_id, genunix, NULL, dtio_done_str) == 0) {
+ dtio_done_id = dtrace_probe_create(dtio_id, genunix, NULL,
+ dtio_done_str, 0, NULL);
+ }
+ if (dtrace_probe_lookup(dtio_id, genunix, NULL,
+ dtio_wait_start_str) == 0) {
+ dtio_wait_start_id = dtrace_probe_create(dtio_id, genunix,
+ NULL,
+ dtio_wait_start_str,
+ 0, NULL);
+ }
+ if (dtrace_probe_lookup(dtio_id, genunix, NULL,
+ dtio_wait_done_str) == 0) {
+ dtio_wait_done_id = dtrace_probe_create(dtio_id, genunix, NULL,
+ dtio_wait_done_str, 0, NULL);
+ }
+
+}
+
+static void
+dtio_destroy(void *arg, dtrace_id_t id, void *parg)
+{
+}
+
+static void
+dtio_enable(void *arg, dtrace_id_t id, void *parg)
+{
+ if (id == dtio_start_id)
+ dtrace_io_start_probe =
+ (dtrace_io_start_probe_func_t)dtrace_probe;
+ else if (id == dtio_done_id)
+ dtrace_io_done_probe =
+ (dtrace_io_done_probe_func_t)dtrace_probe;
+ else if (id == dtio_wait_start_id)
+ dtrace_io_wait_start_probe =
+ (dtrace_io_wait_start_probe_func_t)dtrace_probe;
+ else if (id == dtio_wait_done_id)
+ dtrace_io_wait_done_probe =
+ (dtrace_io_wait_done_probe_func_t)dtrace_probe;
+ else
+ printf("dtrace io provider: unknown ID\n");
+
+}
+
+static void
+dtio_disable(void *arg, dtrace_id_t id, void *parg)
+{
+ if (id == dtio_start_id)
+ dtrace_io_start_probe = NULL;
+ else if (id == dtio_done_id)
+ dtrace_io_done_probe = NULL;
+ else if (id == dtio_wait_start_id)
+ dtrace_io_wait_start_probe = NULL;
+ else if (id == dtio_wait_done_id)
+ dtrace_io_wait_done_probe = NULL;
+ else
+ printf("dtrace io provider: unknown ID\n");
+
+}
+
+static void
+dtio_load(void *dummy)
+{
+ if (dtrace_register("io", &dtio_attr, DTRACE_PRIV_USER, NULL,
+ &dtio_pops, NULL, &dtio_id) != 0)
+ return;
+}
+
+
+static int
+dtio_unload()
+{
+ dtrace_io_start_probe = NULL;
+ dtrace_io_done_probe = NULL;
+ dtrace_io_wait_start_probe = NULL;
+ dtrace_io_wait_done_probe = NULL;
+
+ return (dtrace_unregister(dtio_id));
+}
+
+static int
+dtio_modevent(module_t mod __unused, int type, void *data __unused)
+{
+ int error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ break;
+
+ case MOD_UNLOAD:
+ break;
+
+ case MOD_SHUTDOWN:
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+SYSINIT(dtio_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY,
+ dtio_load, NULL);
+SYSUNINIT(dtio_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY,
+ dtio_unload, NULL);
+
+DEV_MODULE(dtio, dtio_modevent, NULL);
+MODULE_VERSION(dtio, 1);
+MODULE_DEPEND(dtio, dtrace, 1, 1, 1);
+MODULE_DEPEND(dtio, opensolaris, 1, 1, 1);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/imgact_aout.c
--- a/head/sys/kern/imgact_aout.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/imgact_aout.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/imgact_aout.c 223165 2011-06-16 22:00:59Z kib $");
+__FBSDID("$FreeBSD: head/sys/kern/imgact_aout.c 238687 2012-07-22 13:41:45Z kib $");
#include <sys/param.h>
#include <sys/exec.h>
@@ -106,6 +106,7 @@
#define AOUT32_USRSTACK 0xbfc00000
#define AOUT32_PS_STRINGS \
(AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings))
+#define AOUT32_MINUSER FREEBSD32_MINUSER
extern const char *freebsd32_syscallnames[];
extern u_long ia32_maxssiz;
@@ -129,7 +130,7 @@
.sv_imgact_try = NULL,
.sv_minsigstksz = MINSIGSTKSZ,
.sv_pagesize = IA32_PAGE_SIZE,
- .sv_minuser = 0,
+ .sv_minuser = AOUT32_MINUSER,
.sv_maxuser = AOUT32_USRSTACK,
.sv_usrstack = AOUT32_USRSTACK,
.sv_psstrings = AOUT32_PS_STRINGS,
@@ -174,9 +175,9 @@
* 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI.
* NetBSD is in network byte order.. ugh.
*/
- if (((a_out->a_magic >> 16) & 0xff) != 0x86 &&
- ((a_out->a_magic >> 16) & 0xff) != 0 &&
- ((((int)ntohl(a_out->a_magic)) >> 16) & 0xff) != 0x86)
+ if (((a_out->a_midmag >> 16) & 0xff) != 0x86 &&
+ ((a_out->a_midmag >> 16) & 0xff) != 0 &&
+ ((((int)ntohl(a_out->a_midmag)) >> 16) & 0xff) != 0x86)
return -1;
/*
@@ -184,7 +185,7 @@
* We do two cases: host byte order and network byte order
* (for NetBSD compatibility)
*/
- switch ((int)(a_out->a_magic & 0xffff)) {
+ switch ((int)(a_out->a_midmag & 0xffff)) {
case ZMAGIC:
virtual_offset = 0;
if (a_out->a_text) {
@@ -203,7 +204,7 @@
break;
default:
/* NetBSD compatibility */
- switch ((int)(ntohl(a_out->a_magic) & 0xffff)) {
+ switch ((int)(ntohl(a_out->a_midmag) & 0xffff)) {
case ZMAGIC:
case QMAGIC:
virtual_offset = PAGE_SIZE;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/imgact_elf.c
--- a/head/sys/kern/imgact_elf.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/imgact_elf.c Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 232828 2012-03-11 19:38:49Z kib $");
+__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 238617 2012-07-19 11:15:53Z kib $");
#include "opt_capsicum.h"
#include "opt_compat.h"
@@ -83,7 +83,7 @@
static int __elfN(check_header)(const Elf_Ehdr *hdr);
static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
- const char *interp, int32_t *osrel);
+ const char *interp, int interp_name_len, int32_t *osrel);
static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
u_long *entry, size_t pagesize);
static int __elfN(load_section)(struct image_params *imgp, vm_offset_t offset,
@@ -254,7 +254,7 @@
static Elf_Brandinfo *
__elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
- int32_t *osrel)
+ int interp_name_len, int32_t *osrel)
{
const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
Elf_Brandinfo *bi;
@@ -300,7 +300,10 @@
if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
continue;
if (hdr->e_machine == bi->machine &&
- strcmp(interp, bi->interp_path) == 0)
+ /* ELF image p_filesz includes terminating zero */
+ strlen(bi->interp_path) + 1 == interp_name_len &&
+ strncmp(interp, bi->interp_path, interp_name_len)
+ == 0)
return (bi);
}
}
@@ -722,7 +725,7 @@
u_long seg_size, seg_addr;
u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0;
int32_t osrel = 0;
- int error = 0, i, n;
+ int error = 0, i, n, interp_name_len = 0;
const char *interp = NULL, *newinterp = NULL;
Elf_Brandinfo *brand_info;
char *path;
@@ -763,9 +766,11 @@
case PT_INTERP:
/* Path to interpreter */
if (phdr[i].p_filesz > MAXPATHLEN ||
- phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE)
+ phdr[i].p_offset >= PAGE_SIZE ||
+ phdr[i].p_offset + phdr[i].p_filesz >= PAGE_SIZE)
return (ENOEXEC);
interp = imgp->image_header + phdr[i].p_offset;
+ interp_name_len = phdr[i].p_filesz;
break;
case PT_GNU_STACK:
if (__elfN(nxstack))
@@ -775,7 +780,8 @@
}
}
- brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel);
+ brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len,
+ &osrel);
if (brand_info == NULL) {
uprintf("ELF binary type \"%u\" not known.\n",
hdr->e_ident[EI_OSABI]);
@@ -1011,6 +1017,10 @@
AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes);
AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
}
+ if (imgp->sysent->sv_timekeep_base != 0) {
+ AUXARGS_ENTRY(pos, AT_TIMEKEEP,
+ imgp->sysent->sv_timekeep_base);
+ }
AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj
!= NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
imgp->sysent->sv_stackprot);
@@ -1558,6 +1568,7 @@
int i;
if (pnote == NULL || pnote->p_offset >= PAGE_SIZE ||
+ pnote->p_filesz > PAGE_SIZE ||
pnote->p_offset + pnote->p_filesz >= PAGE_SIZE)
return (FALSE);
@@ -1565,15 +1576,17 @@
note_end = (const Elf_Note *)(imgp->image_header +
pnote->p_offset + pnote->p_filesz);
for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
- if (!aligned(note, Elf32_Addr))
+ if (!aligned(note, Elf32_Addr) || (const char *)note_end -
+ (const char *)note < sizeof(Elf_Note))
return (FALSE);
if (note->n_namesz != checknote->hdr.n_namesz ||
note->n_descsz != checknote->hdr.n_descsz ||
note->n_type != checknote->hdr.n_type)
goto nextnote;
note_name = (const char *)(note + 1);
- if (strncmp(checknote->vendor, note_name,
- checknote->hdr.n_namesz) != 0)
+ if (note_name + checknote->hdr.n_namesz >=
+ (const char *)note_end || strncmp(checknote->vendor,
+ note_name, checknote->hdr.n_namesz) != 0)
goto nextnote;
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/imgact_gzip.c
--- a/head/sys/kern/imgact_gzip.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/imgact_gzip.c Wed Jul 25 16:40:53 2012 +0300
@@ -22,7 +22,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/imgact_gzip.c 231885 2012-02-17 23:47:16Z kib $");
+__FBSDID("$FreeBSD: head/sys/kern/imgact_gzip.c 237694 2012-06-28 07:33:43Z imp $");
#include <sys/param.h>
#include <sys/exec.h>
@@ -161,7 +161,7 @@
* Set file/virtual offset based on a.out variant. We do two cases:
* host byte order and network byte order (for NetBSD compatibility)
*/
- switch ((int) (gz->a_out.a_magic & 0xffff)) {
+ switch ((int) (gz->a_out.a_midmag & 0xffff)) {
case ZMAGIC:
gz->virtual_offset = 0;
if (gz->a_out.a_text) {
@@ -177,7 +177,7 @@
break;
default:
/* NetBSD compatibility */
- switch ((int) (ntohl(gz->a_out.a_magic) & 0xffff)) {
+ switch ((int) (ntohl(gz->a_out.a_midmag) & 0xffff)) {
case ZMAGIC:
case QMAGIC:
gz->virtual_offset = PAGE_SIZE;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/init_main.c
--- a/head/sys/kern/init_main.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/init_main.c Wed Jul 25 16:40:53 2012 +0300
@@ -42,7 +42,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/init_main.c 230455 2012-01-22 11:01:36Z pjd $");
+__FBSDID("$FreeBSD: head/sys/kern/init_main.c 236404 2012-06-01 15:42:37Z jhb $");
#include "opt_ddb.h"
#include "opt_init_path.h"
@@ -158,6 +158,24 @@
newsysinit_end = newset + count;
}
+#if defined (DDB) && defined(VERBOSE_SYSINIT)
+static const char *
+symbol_name(vm_offset_t va, db_strategy_t strategy)
+{
+ const char *name;
+ c_db_sym_t sym;
+ db_expr_t offset;
+
+ if (va == 0)
+ return (NULL);
+ sym = db_search_symbol(va, strategy, &offset);
+ if (offset != 0)
+ return (NULL);
+ db_symbol_values(sym, &name, NULL);
+ return (name);
+}
+#endif
+
/*
* System startup; initialize the world, create process 0, mount root
* filesystem, and fork to create init and pagedaemon. Most of the
@@ -238,15 +256,16 @@
}
if (verbose) {
#if defined(DDB)
- const char *name;
- c_db_sym_t sym;
- db_expr_t offset;
+ const char *func, *data;
- sym = db_search_symbol((vm_offset_t)(*sipp)->func,
- DB_STGY_PROC, &offset);
- db_symbol_values(sym, &name, NULL);
- if (name != NULL)
- printf(" %s(%p)... ", name, (*sipp)->udata);
+ func = symbol_name((vm_offset_t)(*sipp)->func,
+ DB_STGY_PROC);
+ data = symbol_name((vm_offset_t)(*sipp)->udata,
+ DB_STGY_ANY);
+ if (func != NULL && data != NULL)
+ printf(" %s(&%s)... ", func, data);
+ else if (func != NULL)
+ printf(" %s(%p)... ", func, (*sipp)->udata);
else
#endif
printf(" %p(%p)... ", (*sipp)->func,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/init_sysent.c
--- a/head/sys/kern/init_sysent.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/init_sysent.c Wed Jul 25 16:40:53 2012 +0300
@@ -2,8 +2,8 @@
* System call switch table.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: head/sys/kern/init_sysent.c 227776 2011-11-21 01:26:10Z lstewart $
- * created from FreeBSD: head/sys/kern/syscalls.master 227691 2011-11-19 06:35:15Z ed
+ * $FreeBSD: head/sys/kern/init_sysent.c 236363 2012-05-31 19:34:53Z pjd $
+ * created from FreeBSD: head/sys/kern/syscalls.master 236026 2012-05-25 21:50:48Z ed
*/
#include "opt_compat.h"
@@ -525,19 +525,19 @@
{ AS(cpuset_setaffinity_args), (sy_call_t *)sys_cpuset_setaffinity, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 488 = cpuset_setaffinity */
{ AS(faccessat_args), (sy_call_t *)sys_faccessat, AUE_FACCESSAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 489 = faccessat */
{ AS(fchmodat_args), (sy_call_t *)sys_fchmodat, AUE_FCHMODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 490 = fchmodat */
- { AS(fchownat_args), (sy_call_t *)sys_fchownat, AUE_FCHOWNAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 491 = fchownat */
+ { AS(fchownat_args), (sy_call_t *)sys_fchownat, AUE_FCHOWNAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 491 = fchownat */
{ AS(fexecve_args), (sy_call_t *)sys_fexecve, AUE_FEXECVE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 492 = fexecve */
{ AS(fstatat_args), (sy_call_t *)sys_fstatat, AUE_FSTATAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 493 = fstatat */
{ AS(futimesat_args), (sy_call_t *)sys_futimesat, AUE_FUTIMESAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 494 = futimesat */
- { AS(linkat_args), (sy_call_t *)sys_linkat, AUE_LINKAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 495 = linkat */
+ { AS(linkat_args), (sy_call_t *)sys_linkat, AUE_LINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 495 = linkat */
{ AS(mkdirat_args), (sy_call_t *)sys_mkdirat, AUE_MKDIRAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 496 = mkdirat */
{ AS(mkfifoat_args), (sy_call_t *)sys_mkfifoat, AUE_MKFIFOAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 497 = mkfifoat */
{ AS(mknodat_args), (sy_call_t *)sys_mknodat, AUE_MKNODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 498 = mknodat */
{ AS(openat_args), (sy_call_t *)sys_openat, AUE_OPENAT_RWTC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 499 = openat */
- { AS(readlinkat_args), (sy_call_t *)sys_readlinkat, AUE_READLINKAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 500 = readlinkat */
+ { AS(readlinkat_args), (sy_call_t *)sys_readlinkat, AUE_READLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 500 = readlinkat */
{ AS(renameat_args), (sy_call_t *)sys_renameat, AUE_RENAMEAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 501 = renameat */
- { AS(symlinkat_args), (sy_call_t *)sys_symlinkat, AUE_SYMLINKAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 502 = symlinkat */
- { AS(unlinkat_args), (sy_call_t *)sys_unlinkat, AUE_UNLINKAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 503 = unlinkat */
+ { AS(symlinkat_args), (sy_call_t *)sys_symlinkat, AUE_SYMLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 502 = symlinkat */
+ { AS(unlinkat_args), (sy_call_t *)sys_unlinkat, AUE_UNLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 503 = unlinkat */
{ AS(posix_openpt_args), (sy_call_t *)sys_posix_openpt, AUE_POSIX_OPENPT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 504 = posix_openpt */
{ AS(gssd_syscall_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 505 = gssd_syscall */
{ AS(jail_get_args), (sy_call_t *)sys_jail_get, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 506 = jail_get */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_acct.c
--- a/head/sys/kern/kern_acct.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_acct.c Wed Jul 25 16:40:53 2012 +0300
@@ -68,7 +68,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_acct.c 225617 2011-09-16 13:58:51Z kmacy $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_acct.c 234927 2012-05-02 14:25:39Z jhb $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -122,7 +122,7 @@
static uint32_t encode_long(long);
static void acctwatch(void);
static void acct_thread(void *);
-static int acct_disable(struct thread *);
+static int acct_disable(struct thread *, int);
/*
* Accounting vnode pointer, saved vnode pointer, and flags for each.
@@ -196,7 +196,7 @@
sys_acct(struct thread *td, struct acct_args *uap)
{
struct nameidata nd;
- int error, flags, vfslocked;
+ int error, flags, vfslocked, replacing;
error = priv_check(td, PRIV_ACCT);
if (error)
@@ -246,6 +246,13 @@
sx_xlock(&acct_sx);
/*
+ * Don't log spurious disable/enable messages if we are
+ * switching from one accounting file to another due to log
+ * rotation.
+ */
+ replacing = (acct_vp != NULL && uap->path != NULL);
+
+ /*
* If accounting was previously enabled, kill the old space-watcher,
* close the file, and (if no new file was specified, leave). Reset
* the suspended state regardless of whether accounting remains
@@ -254,7 +261,7 @@
acct_suspended = 0;
if (acct_vp != NULL) {
vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
- error = acct_disable(td);
+ error = acct_disable(td, !replacing);
VFS_UNLOCK_GIANT(vfslocked);
}
if (uap->path == NULL) {
@@ -299,7 +306,8 @@
}
acct_configured = 1;
sx_xunlock(&acct_sx);
- log(LOG_NOTICE, "Accounting enabled\n");
+ if (!replacing)
+ log(LOG_NOTICE, "Accounting enabled\n");
return (error);
}
@@ -308,7 +316,7 @@
* our reference to the credential, and clearing the vnode's flags.
*/
static int
-acct_disable(struct thread *td)
+acct_disable(struct thread *td, int logging)
{
int error;
@@ -319,7 +327,8 @@
acct_vp = NULL;
acct_cred = NULL;
acct_flags = 0;
- log(LOG_NOTICE, "Accounting disabled\n");
+ if (logging)
+ log(LOG_NOTICE, "Accounting disabled\n");
return (error);
}
@@ -574,7 +583,7 @@
*/
vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
if (acct_vp->v_type == VBAD) {
- (void) acct_disable(NULL);
+ (void) acct_disable(NULL, 1);
VFS_UNLOCK_GIANT(vfslocked);
acct_state |= ACCT_EXITREQ;
return;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_clock.c
--- a/head/sys/kern/kern_clock.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_clock.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,11 +35,12 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_clock.c 233628 2012-03-28 20:58:30Z fabient $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_clock.c 235459 2012-05-15 01:30:25Z rstone $");
#include "opt_kdb.h"
#include "opt_device_polling.h"
#include "opt_hwpmc_hooks.h"
+#include "opt_kdtrace.h"
#include "opt_ntp.h"
#include "opt_watchdog.h"
@@ -56,6 +57,7 @@
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/sleepqueue.h>
#include <sys/smp.h>
@@ -88,6 +90,9 @@
/* Spin-lock protecting profiling statistics. */
static struct mtx time_lock;
+SDT_PROVIDER_DECLARE(sched);
+SDT_PROBE_DEFINE2(sched, , , tick, tick, "struct thread *", "struct proc *");
+
static int
sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
{
@@ -760,6 +765,7 @@
ru->ru_maxrss = rss;
KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
"prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
+ SDT_PROBE2(sched, , , tick, td, td->td_proc);
thread_lock_flags(td, MTX_QUIET);
for ( ; cnt > 0; cnt--)
sched_clock(td);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_conf.c
--- a/head/sys/kern/kern_conf.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_conf.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_conf.c 231386 2012-02-10 14:55:47Z ed $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_conf.c 235899 2012-05-24 11:24:44Z mav $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -993,9 +993,10 @@
max_parentpath_len = SPECNAMELEN - physpath_len - /*/*/1;
parentpath_len = strlen(pdev->si_name);
if (max_parentpath_len < parentpath_len) {
- printf("make_dev_physpath_alias: WARNING - Unable to alias %s "
- "to %s/%s - path too long\n",
- pdev->si_name, physpath, pdev->si_name);
+ if (bootverbose)
+ printf("WARNING: Unable to alias %s "
+ "to %s/%s - path too long\n",
+ pdev->si_name, physpath, pdev->si_name);
ret = ENAMETOOLONG;
goto out;
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_descrip.c
--- a/head/sys/kern/kern_descrip.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_descrip.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_descrip.c 234131 2012-04-11 14:08:09Z eadler $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_descrip.c 238667 2012-07-21 13:02:11Z kib $");
#include "opt_capsicum.h"
#include "opt_compat.h"
@@ -102,7 +102,7 @@
static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
- "file desc to leader structures");
+ "file desc to leader structures");
static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
MALLOC_DECLARE(M_FADVISE);
@@ -113,21 +113,24 @@
/* Flags for do_dup() */
#define DUP_FIXED 0x1 /* Force fixed allocation */
#define DUP_FCNTL 0x2 /* fcntl()-style errors */
-
-static int do_dup(struct thread *td, int flags, int old, int new,
- register_t *retval);
-static int fd_first_free(struct filedesc *, int, int);
-static int fd_last_used(struct filedesc *, int, int);
-static void fdgrowtable(struct filedesc *, int);
+#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */
+
+static int closefp(struct filedesc *fdp, int fd, struct file *fp,
+ struct thread *td, int holdleaders);
+static int do_dup(struct thread *td, int flags, int old, int new,
+ register_t *retval);
+static int fd_first_free(struct filedesc *fdp, int low, int size);
+static int fd_last_used(struct filedesc *fdp, int size);
+static void fdgrowtable(struct filedesc *fdp, int nfd);
static void fdunused(struct filedesc *fdp, int fd);
static void fdused(struct filedesc *fdp, int fd);
-static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif);
-static int fill_socket_info(struct socket *so, struct kinfo_file *kif);
-static int fill_pts_info(struct tty *tp, struct kinfo_file *kif);
static int fill_pipe_info(struct pipe *pi, struct kinfo_file *kif);
static int fill_procdesc_info(struct procdesc *pdp,
- struct kinfo_file *kif);
+ struct kinfo_file *kif);
+static int fill_pts_info(struct tty *tp, struct kinfo_file *kif);
static int fill_shm_info(struct file *fp, struct kinfo_file *kif);
+static int fill_socket_info(struct socket *so, struct kinfo_file *kif);
+static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif);
/*
* A process is initially started out with NDFILE descriptors stored within
@@ -181,14 +184,15 @@
*/
volatile int openfiles; /* actual number of open files */
struct mtx sigio_lock; /* mtx to protect pointers to sigio */
-void (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
+void (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
/* A mutex to protect the association between a proc and filedesc. */
-static struct mtx fdesc_mtx;
+static struct mtx fdesc_mtx;
/*
- * Find the first zero bit in the given bitmap, starting at low and not
- * exceeding size - 1.
+ * If low >= size, just return low. Otherwise find the first zero bit in the
+ * given bitmap, starting at low and not exceeding size - 1. Return size if
+ * not found.
*/
static int
fd_first_free(struct filedesc *fdp, int low, int size)
@@ -214,19 +218,16 @@
}
/*
- * Find the highest non-zero bit in the given bitmap, starting at low and
- * not exceeding size - 1.
+ * Find the highest non-zero bit in the given bitmap, starting at 0 and
+ * not exceeding size - 1. Return -1 if not found.
*/
static int
-fd_last_used(struct filedesc *fdp, int low, int size)
+fd_last_used(struct filedesc *fdp, int size)
{
NDSLOTTYPE *map = fdp->fd_map;
NDSLOTTYPE mask;
int off, minoff;
- if (low >= size)
- return (-1);
-
off = NDSLOT(size);
if (size % NDENTRIES) {
mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
@@ -234,17 +235,21 @@
return (off * NDENTRIES + flsl(mask) - 1);
--off;
}
- for (minoff = NDSLOT(low); off >= minoff; --off)
+ for (minoff = NDSLOT(0); off >= minoff; --off)
if (map[off] != 0)
return (off * NDENTRIES + flsl(map[off]) - 1);
- return (low - 1);
+ return (-1);
}
static int
fdisused(struct filedesc *fdp, int fd)
{
- KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
- ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
+
+ FILEDESC_LOCK_ASSERT(fdp);
+
+ KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+ ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
+
return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
}
@@ -256,8 +261,8 @@
{
FILEDESC_XLOCK_ASSERT(fdp);
- KASSERT(!fdisused(fdp, fd),
- ("fd already used"));
+
+ KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd));
fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
if (fd > fdp->fd_lastfile)
@@ -274,16 +279,15 @@
{
FILEDESC_XLOCK_ASSERT(fdp);
- KASSERT(fdisused(fdp, fd),
- ("fd is already unused"));
- KASSERT(fdp->fd_ofiles[fd] == NULL,
- ("fd is still in use"));
+
+ KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd));
+ KASSERT(fdp->fd_ofiles[fd] == NULL, ("fd=%d is still in use", fd));
fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
if (fd < fdp->fd_freefile)
fdp->fd_freefile = fd;
if (fd == fdp->fd_lastfile)
- fdp->fd_lastfile = fd_last_used(fdp, 0, fd);
+ fdp->fd_lastfile = fd_last_used(fdp, fd);
}
/*
@@ -363,7 +367,7 @@
sys_fcntl(struct thread *td, struct fcntl_args *uap)
{
struct flock fl;
- struct oflock ofl;
+ struct __oflock ofl;
intptr_t arg;
int error;
int cmd;
@@ -427,23 +431,13 @@
return (error);
}
-static inline struct file *
-fdtofp(int fd, struct filedesc *fdp)
-{
- struct file *fp;
-
- FILEDESC_LOCK_ASSERT(fdp);
- if ((unsigned)fd >= fdp->fd_nfiles ||
- (fp = fdp->fd_ofiles[fd]) == NULL)
- return (NULL);
- return (fp);
-}
-
static inline int
fdunwrap(int fd, cap_rights_t rights, struct filedesc *fdp, struct file **fpp)
{
- *fpp = fdtofp(fd, fdp);
+ FILEDESC_LOCK_ASSERT(fdp);
+
+ *fpp = fget_locked(fdp, fd);
if (*fpp == NULL)
return (EBADF);
@@ -472,6 +466,7 @@
int vfslocked;
u_int old, new;
uint64_t bsize;
+ off_t foffset;
vfslocked = 0;
error = 0;
@@ -485,6 +480,12 @@
error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval);
break;
+ case F_DUPFD_CLOEXEC:
+ tmp = arg;
+ error = do_dup(td, DUP_FCNTL | DUP_CLOEXEC, fd, tmp,
+ td->td_retval);
+ break;
+
case F_DUP2FD:
tmp = arg;
error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval);
@@ -492,7 +493,7 @@
case F_GETFD:
FILEDESC_SLOCK(fdp);
- if ((fp = fdtofp(fd, fdp)) == NULL) {
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
FILEDESC_SUNLOCK(fdp);
error = EBADF;
break;
@@ -504,7 +505,7 @@
case F_SETFD:
FILEDESC_XLOCK(fdp);
- if ((fp = fdtofp(fd, fdp)) == NULL) {
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
FILEDESC_XUNLOCK(fdp);
error = EBADF;
break;
@@ -613,14 +614,15 @@
}
flp = (struct flock *)arg;
if (flp->l_whence == SEEK_CUR) {
- if (fp->f_offset < 0 ||
+ foffset = foffset_get(fp);
+ if (foffset < 0 ||
(flp->l_start > 0 &&
- fp->f_offset > OFF_MAX - flp->l_start)) {
+ foffset > OFF_MAX - flp->l_start)) {
FILEDESC_SUNLOCK(fdp);
error = EOVERFLOW;
break;
}
- flp->l_start += fp->f_offset;
+ flp->l_start += foffset;
}
/*
@@ -675,10 +677,30 @@
}
VFS_UNLOCK_GIANT(vfslocked);
vfslocked = 0;
- /* Check for race with close */
+ if (error != 0 || flp->l_type == F_UNLCK ||
+ flp->l_type == F_UNLCKSYS) {
+ fdrop(fp, td);
+ break;
+ }
+
+ /*
+ * Check for a race with close.
+ *
+ * The vnode is now advisory locked (or unlocked, but this case
+ * is not really important) as the caller requested.
+ * We had to drop the filedesc lock, so we need to recheck if
+ * the descriptor is still valid, because if it was closed
+ * in the meantime we need to remove advisory lock from the
+ * vnode - close on any descriptor leading to an advisory
+ * locked vnode, removes that lock.
+ * We will return 0 on purpose in that case, as the result of
+ * successful advisory lock might have been externally visible
+ * already. This is fine - effectively we pretend to the caller
+ * that the closing thread was a bit slower and that the
+ * advisory lock succeeded before the close.
+ */
FILEDESC_SLOCK(fdp);
- if ((unsigned) fd >= fdp->fd_nfiles ||
- fp != fdp->fd_ofiles[fd]) {
+ if (fget_locked(fdp, fd) != fp) {
FILEDESC_SUNLOCK(fdp);
flp->l_whence = SEEK_SET;
flp->l_start = 0;
@@ -686,7 +708,7 @@
flp->l_type = F_UNLCK;
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
- F_UNLCK, flp, F_POSIX);
+ F_UNLCK, flp, F_POSIX);
VFS_UNLOCK_GIANT(vfslocked);
vfslocked = 0;
} else
@@ -714,15 +736,16 @@
break;
}
if (flp->l_whence == SEEK_CUR) {
+ foffset = foffset_get(fp);
if ((flp->l_start > 0 &&
- fp->f_offset > OFF_MAX - flp->l_start) ||
+ foffset > OFF_MAX - flp->l_start) ||
(flp->l_start < 0 &&
- fp->f_offset < OFF_MIN - flp->l_start)) {
+ foffset < OFF_MIN - flp->l_start)) {
FILEDESC_SUNLOCK(fdp);
error = EOVERFLOW;
break;
}
- flp->l_start += fp->f_offset;
+ flp->l_start += foffset;
}
/*
* VOP_ADVLOCK() may block.
@@ -743,7 +766,7 @@
/* FALLTHROUGH */
case F_READAHEAD:
FILEDESC_SLOCK(fdp);
- if ((fp = fdtofp(fd, fdp)) == NULL) {
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
FILEDESC_SUNLOCK(fdp);
error = EBADF;
break;
@@ -799,7 +822,7 @@
struct proc *p;
struct file *fp;
struct file *delfp;
- int error, holdleaders, maxfd;
+ int error, maxfd;
p = td->td_proc;
fdp = p->p_fd;
@@ -820,7 +843,7 @@
return (flags & DUP_FCNTL ? EINVAL : EBADF);
FILEDESC_XLOCK(fdp);
- if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
+ if (fget_locked(fdp, old) == NULL) {
FILEDESC_XUNLOCK(fdp);
return (EBADF);
}
@@ -871,77 +894,29 @@
}
}
+ KASSERT(fp == fdp->fd_ofiles[old], ("old fd has been modified"));
+ KASSERT(old != new, ("new fd is same as old"));
+
+ delfp = fdp->fd_ofiles[new];
/*
- * If the old file changed out from under us then treat it as a
- * bad file descriptor. Userland should do its own locking to
- * avoid this case.
- */
- if (fdp->fd_ofiles[old] != fp) {
- /* we've allocated a descriptor which we won't use */
- if (fdp->fd_ofiles[new] == NULL)
- fdunused(fdp, new);
- FILEDESC_XUNLOCK(fdp);
- fdrop(fp, td);
- return (EBADF);
- }
- KASSERT(old != new,
- ("new fd is same as old"));
-
- /*
- * Save info on the descriptor being overwritten. We cannot close
- * it without introducing an ownership race for the slot, since we
- * need to drop the filedesc lock to call closef().
- *
- * XXX this duplicates parts of close().
- */
- delfp = fdp->fd_ofiles[new];
- holdleaders = 0;
- if (delfp != NULL) {
- if (td->td_proc->p_fdtol != NULL) {
- /*
- * Ask fdfree() to sleep to ensure that all relevant
- * process leaders can be traversed in closef().
- */
- fdp->fd_holdleaderscount++;
- holdleaders = 1;
- }
- }
-
- /*
- * Duplicate the source descriptor
+ * Duplicate the source descriptor.
*/
fdp->fd_ofiles[new] = fp;
- fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+ if ((flags & DUP_CLOEXEC) != 0)
+ fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] | UF_EXCLOSE;
+ else
+ fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
if (new > fdp->fd_lastfile)
fdp->fd_lastfile = new;
*retval = new;
- /*
- * If we dup'd over a valid file, we now own the reference to it
- * and must dispose of it using closef() semantics (as if a
- * close() were performed on it).
- *
- * XXX this duplicates parts of close().
- */
if (delfp != NULL) {
- knote_fdclose(td, new);
- if (delfp->f_type == DTYPE_MQUEUE)
- mq_fdclose(td, new, delfp);
- FILEDESC_XUNLOCK(fdp);
- (void) closef(delfp, td);
- if (holdleaders) {
- FILEDESC_XLOCK(fdp);
- fdp->fd_holdleaderscount--;
- if (fdp->fd_holdleaderscount == 0 &&
- fdp->fd_holdleaderswakeup != 0) {
- fdp->fd_holdleaderswakeup = 0;
- wakeup(&fdp->fd_holdleaderscount);
- }
- FILEDESC_XUNLOCK(fdp);
- }
+ (void) closefp(fdp, new, delfp, td, 1);
+ /* closefp() drops the FILEDESC lock for us. */
} else {
FILEDESC_XUNLOCK(fdp);
}
+
return (0);
}
@@ -1165,6 +1140,61 @@
}
/*
+ * Function drops the filedesc lock on return.
+ */
+static int
+closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
+ int holdleaders)
+{
+ struct file *fp_object;
+ int error;
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+
+ if (holdleaders) {
+ if (td->td_proc->p_fdtol != NULL) {
+ /*
+ * Ask fdfree() to sleep to ensure that all relevant
+ * process leaders can be traversed in closef().
+ */
+ fdp->fd_holdleaderscount++;
+ } else {
+ holdleaders = 0;
+ }
+ }
+
+ /*
+ * We now hold the fp reference that used to be owned by the
+ * descriptor array. We have to unlock the FILEDESC *AFTER*
+ * knote_fdclose to prevent a race of the fd getting opened, a knote
+ * added, and deleteing a knote for the new fd.
+ */
+ knote_fdclose(td, fd);
+
+ /*
+ * When we're closing an fd with a capability, we need to notify
+ * mqueue if the underlying object is of type mqueue.
+ */
+ (void)cap_funwrap(fp, 0, &fp_object);
+ if (fp_object->f_type == DTYPE_MQUEUE)
+ mq_fdclose(td, fd, fp_object);
+ FILEDESC_XUNLOCK(fdp);
+
+ error = closef(fp, td);
+ if (holdleaders) {
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_holdleaderscount--;
+ if (fdp->fd_holdleaderscount == 0 &&
+ fdp->fd_holdleaderswakeup != 0) {
+ fdp->fd_holdleaderswakeup = 0;
+ wakeup(&fdp->fd_holdleaderscount);
+ }
+ FILEDESC_XUNLOCK(fdp);
+ }
+ return (error);
+}
+
+/*
* Close a file descriptor.
*/
#ifndef _SYS_SYSPROTO_H_
@@ -1188,63 +1218,23 @@
int fd;
{
struct filedesc *fdp;
- struct file *fp, *fp_object;
- int error;
- int holdleaders;
-
- error = 0;
- holdleaders = 0;
+ struct file *fp;
+
fdp = td->td_proc->p_fd;
AUDIT_SYSCLOSE(td, fd);
FILEDESC_XLOCK(fdp);
- if ((unsigned)fd >= fdp->fd_nfiles ||
- (fp = fdp->fd_ofiles[fd]) == NULL) {
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
FILEDESC_XUNLOCK(fdp);
return (EBADF);
}
fdp->fd_ofiles[fd] = NULL;
fdp->fd_ofileflags[fd] = 0;
fdunused(fdp, fd);
- if (td->td_proc->p_fdtol != NULL) {
- /*
- * Ask fdfree() to sleep to ensure that all relevant
- * process leaders can be traversed in closef().
- */
- fdp->fd_holdleaderscount++;
- holdleaders = 1;
- }
-
- /*
- * We now hold the fp reference that used to be owned by the
- * descriptor array. We have to unlock the FILEDESC *AFTER*
- * knote_fdclose to prevent a race of the fd getting opened, a knote
- * added, and deleteing a knote for the new fd.
- */
- knote_fdclose(td, fd);
-
- /*
- * When we're closing an fd with a capability, we need to notify
- * mqueue if the underlying object is of type mqueue.
- */
- (void)cap_funwrap(fp, 0, &fp_object);
- if (fp_object->f_type == DTYPE_MQUEUE)
- mq_fdclose(td, fd, fp_object);
- FILEDESC_XUNLOCK(fdp);
-
- error = closef(fp, td);
- if (holdleaders) {
- FILEDESC_XLOCK(fdp);
- fdp->fd_holdleaderscount--;
- if (fdp->fd_holdleaderscount == 0 &&
- fdp->fd_holdleaderswakeup != 0) {
- fdp->fd_holdleaderswakeup = 0;
- wakeup(&fdp->fd_holdleaderscount);
- }
- FILEDESC_XUNLOCK(fdp);
- }
- return (error);
+
+ /* closefp() drops the FILEDESC lock for us. */
+ return (closefp(fdp, fd, fp, td, 1));
}
/*
@@ -1407,6 +1397,7 @@
vp = fp->f_vnode;
if (vp != NULL) {
int vfslocked;
+
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vn_lock(vp, LK_SHARED | LK_RETRY);
error = VOP_PATHCONF(vp, uap->name, td->td_retval);
@@ -1417,7 +1408,7 @@
error = EINVAL;
} else {
td->td_retval[0] = PIPE_BUF;
- error = 0;
+ error = 0;
}
} else {
error = EOPNOTSUPP;
@@ -1428,9 +1419,7 @@
}
/*
- * Grow the file table to accomodate (at least) nfd descriptors. This may
- * block and drop the filedesc lock, but it will reacquire it before
- * returning.
+ * Grow the file table to accomodate (at least) nfd descriptors.
*/
static void
fdgrowtable(struct filedesc *fdp, int nfd)
@@ -1456,7 +1445,6 @@
return;
/* allocate a new table and (if required) new bitmaps */
- FILEDESC_XUNLOCK(fdp);
ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
M_FILEDESC, M_ZERO | M_WAITOK);
nfileflags = (char *)&ntable[nnfiles];
@@ -1465,20 +1453,7 @@
M_FILEDESC, M_ZERO | M_WAITOK);
else
nmap = NULL;
- FILEDESC_XLOCK(fdp);
-
- /*
- * We now have new tables ready to go. Since we dropped the
- * filedesc lock to call malloc(), watch out for a race.
- */
- onfiles = fdp->fd_nfiles;
- if (onfiles >= nnfiles) {
- /* we lost the race, but that's OK */
- free(ntable, M_FILEDESC);
- if (nmap != NULL)
- free(nmap, M_FILEDESC);
- return;
- }
+
bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
otable = fdp->fd_ofiles;
@@ -1512,7 +1487,7 @@
{
struct proc *p = td->td_proc;
struct filedesc *fdp = p->p_fd;
- int fd = -1, maxfd;
+ int fd = -1, maxfd, allocfd;
#ifdef RACCT
int error;
#endif
@@ -1527,36 +1502,38 @@
PROC_UNLOCK(p);
/*
- * Search the bitmap for a free descriptor. If none is found, try
- * to grow the file table. Keep at it until we either get a file
- * descriptor or run into process or system limits; fdgrowtable()
- * may drop the filedesc lock, so we're in a race.
+ * Search the bitmap for a free descriptor starting at minfd.
+ * If none is found, grow the file table.
*/
- for (;;) {
- fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
- if (fd >= maxfd)
- return (EMFILE);
- if (fd < fdp->fd_nfiles)
- break;
+ fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
+ if (fd >= maxfd)
+ return (EMFILE);
+ if (fd >= fdp->fd_nfiles) {
+ allocfd = min(fd * 2, maxfd);
#ifdef RACCT
PROC_LOCK(p);
- error = racct_set(p, RACCT_NOFILE, min(fdp->fd_nfiles * 2, maxfd));
+ error = racct_set(p, RACCT_NOFILE, allocfd);
PROC_UNLOCK(p);
if (error != 0)
return (EMFILE);
#endif
- fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd));
+ /*
+ * fd is already equal to first free descriptor >= minfd, so
+ * we only need to grow the table and we are done.
+ */
+ fdgrowtable(fdp, allocfd);
}
/*
* Perform some sanity checks, then mark the file descriptor as
* used and return it to the caller.
*/
+ KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles),
+ ("invalid descriptor %d", fd));
KASSERT(!fdisused(fdp, fd),
("fd_first_free() returned non-free descriptor"));
- KASSERT(fdp->fd_ofiles[fd] == NULL,
- ("free descriptor isn't"));
- fdp->fd_ofileflags[fd] = 0; /* XXX needed? */
+ KASSERT(fdp->fd_ofiles[fd] == NULL, ("file descriptor isn't free"));
+ KASSERT(fdp->fd_ofileflags[fd] == 0, ("file flags are set"));
fdused(fdp, fd);
*result = fd;
return (0);
@@ -1571,7 +1548,6 @@
{
struct proc *p = td->td_proc;
struct filedesc *fdp = td->td_proc->p_fd;
- struct file **fpp;
int i, lim, last;
FILEDESC_LOCK_ASSERT(fdp);
@@ -1587,9 +1563,8 @@
if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
return (1);
last = min(fdp->fd_nfiles, lim);
- fpp = &fdp->fd_ofiles[fdp->fd_freefile];
- for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
- if (*fpp == NULL && --n <= 0)
+ for (i = fdp->fd_freefile; i < last; i++) {
+ if (fdp->fd_ofiles[i] == NULL && --n <= 0)
return (1);
}
return (0);
@@ -1848,7 +1823,6 @@
fdfree(struct thread *td)
{
struct filedesc *fdp;
- struct file **fpp;
int i, locked;
struct filedesc_to_leader *fdtol;
struct file *fp;
@@ -1875,13 +1849,10 @@
fdtol->fdl_refcount));
if (fdtol->fdl_refcount == 1 &&
(td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
- for (i = 0, fpp = fdp->fd_ofiles;
- i <= fdp->fd_lastfile;
- i++, fpp++) {
- if (*fpp == NULL ||
- (*fpp)->f_type != DTYPE_VNODE)
+ for (i = 0; i <= fdp->fd_lastfile; i++) {
+ fp = fdp->fd_ofiles[i];
+ if (fp == NULL || fp->f_type != DTYPE_VNODE)
continue;
- fp = *fpp;
fhold(fp);
FILEDESC_XUNLOCK(fdp);
lf.l_whence = SEEK_SET;
@@ -1891,15 +1862,11 @@
vp = fp->f_vnode;
locked = VFS_LOCK_GIANT(vp->v_mount);
(void) VOP_ADVLOCK(vp,
- (caddr_t)td->td_proc->
- p_leader,
- F_UNLCK,
- &lf,
- F_POSIX);
+ (caddr_t)td->td_proc->p_leader, F_UNLCK,
+ &lf, F_POSIX);
VFS_UNLOCK_GIANT(locked);
FILEDESC_XLOCK(fdp);
fdrop(fp, td);
- fpp = fdp->fd_ofiles + i;
}
}
retry:
@@ -1944,12 +1911,11 @@
if (i > 0)
return;
- fpp = fdp->fd_ofiles;
- for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
- if (*fpp) {
+ for (i = 0; i <= fdp->fd_lastfile; i++) {
+ fp = fdp->fd_ofiles[i];
+ if (fp != NULL) {
FILEDESC_XLOCK(fdp);
- fp = *fpp;
- *fpp = NULL;
+ fdp->fd_ofiles[i] = NULL;
FILEDESC_XUNLOCK(fdp);
(void) closef(fp, td);
}
@@ -2086,6 +2052,7 @@
fdcloseexec(struct thread *td)
{
struct filedesc *fdp;
+ struct file *fp;
int i;
/* Certain daemons might not have file descriptors. */
@@ -2093,31 +2060,20 @@
if (fdp == NULL)
return;
- FILEDESC_XLOCK(fdp);
-
/*
* We cannot cache fd_ofiles or fd_ofileflags since operations
* may block and rip them out from under us.
*/
+ FILEDESC_XLOCK(fdp);
for (i = 0; i <= fdp->fd_lastfile; i++) {
- if (fdp->fd_ofiles[i] != NULL &&
- (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE ||
+ fp = fdp->fd_ofiles[i];
+ if (fp != NULL && (fp->f_type == DTYPE_MQUEUE ||
(fdp->fd_ofileflags[i] & UF_EXCLOSE))) {
- struct file *fp;
-
- knote_fdclose(td, i);
- /*
- * NULL-out descriptor prior to close to avoid
- * a race while close blocks.
- */
- fp = fdp->fd_ofiles[i];
fdp->fd_ofiles[i] = NULL;
fdp->fd_ofileflags[i] = 0;
fdunused(fdp, i);
- if (fp->f_type == DTYPE_MQUEUE)
- mq_fdclose(td, i, fp);
- FILEDESC_XUNLOCK(fdp);
- (void) closef(fp, td);
+ (void) closefp(fdp, i, fp, td, 0);
+ /* closefp() drops the FILEDESC lock. */
FILEDESC_XLOCK(fdp);
}
}
@@ -2198,7 +2154,7 @@
* node, not the capability itself.
*/
(void)cap_funwrap(fp, 0, &fp_object);
- if ((fp_object->f_type == DTYPE_VNODE) && (td != NULL)) {
+ if (fp_object->f_type == DTYPE_VNODE && td != NULL) {
int vfslocked;
vp = fp_object->f_vnode;
@@ -2209,7 +2165,7 @@
lf.l_len = 0;
lf.l_type = F_UNLCK;
(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
- F_UNLCK, &lf, F_POSIX);
+ F_UNLCK, &lf, F_POSIX);
}
fdtol = td->td_proc->p_fdtol;
if (fdtol != NULL) {
@@ -2233,8 +2189,8 @@
lf.l_type = F_UNLCK;
vp = fp_object->f_vnode;
(void) VOP_ADVLOCK(vp,
- (caddr_t)fdtol->fdl_leader,
- F_UNLCK, &lf, F_POSIX);
+ (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf,
+ F_POSIX);
FILEDESC_XLOCK(fdp);
fdtol->fdl_holdcount--;
if (fdtol->fdl_holdcount == 0 &&
@@ -2329,8 +2285,8 @@
struct file *fp;
#ifdef CAPABILITIES
struct file *fp_fromcap;
+#endif
int error;
-#endif
*fpp = NULL;
if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
@@ -2369,7 +2325,7 @@
else
error = cap_funwrap_mmap(fp, needrights, maxprotp,
&fp_fromcap);
- if (error) {
+ if (error != 0) {
fdrop(fp, td);
return (error);
}
@@ -2394,14 +2350,30 @@
/*
* FREAD and FWRITE failure return EBADF as per POSIX.
- *
- * Only one flag, or 0, may be specified.
*/
- if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
- (flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
+ error = 0;
+ switch (flags) {
+ case FREAD:
+ case FWRITE:
+ if ((fp->f_flag & flags) == 0)
+ error = EBADF;
+ break;
+ case FEXEC:
+ if ((fp->f_flag & (FREAD | FEXEC)) == 0 ||
+ ((fp->f_flag & FWRITE) != 0))
+ error = EBADF;
+ break;
+ case 0:
+ break;
+ default:
+ KASSERT(0, ("wrong flags"));
+ }
+
+ if (error != 0) {
fdrop(fp, td);
- return (EBADF);
+ return (error);
}
+
*fpp = fp;
return (0);
}
@@ -2498,6 +2470,13 @@
return (_fgetvp(td, fd, FREAD, rights, NULL, vpp));
}
+int
+fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp)
+{
+
+ return (_fgetvp(td, fd, FEXEC, rights, NULL, vpp));
+}
+
#ifdef notyet
int
fgetvp_write(struct thread *td, int fd, cap_rights_t rights,
@@ -2647,10 +2626,13 @@
* Duplicate the specified descriptor to a free descriptor.
*/
int
-dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error)
+dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp)
{
- struct file *wfp;
struct file *fp;
+ int error, indx;
+
+ KASSERT(openerror == ENODEV || openerror == ENXIO,
+ ("unexpected error %d in %s", openerror, __func__));
/*
* If the to-be-dup'd fd number is greater than the allowed number
@@ -2658,12 +2640,17 @@
* closed, then reject.
*/
FILEDESC_XLOCK(fdp);
- if (dfd < 0 || dfd >= fdp->fd_nfiles ||
- (wfp = fdp->fd_ofiles[dfd]) == NULL) {
+ if ((fp = fget_locked(fdp, dfd)) == NULL) {
FILEDESC_XUNLOCK(fdp);
return (EBADF);
}
+ error = fdalloc(td, 0, &indx);
+ if (error != 0) {
+ FILEDESC_XUNLOCK(fdp);
+ return (error);
+ }
+
/*
* There are two cases of interest here.
*
@@ -2671,61 +2658,36 @@
*
* For ENXIO steal away the file structure from (dfd) and store it in
* (indx). (dfd) is effectively closed by this operation.
- *
- * Any other error code is just returned.
*/
- switch (error) {
+ switch (openerror) {
case ENODEV:
/*
* Check that the mode the file is being opened for is a
* subset of the mode of the existing descriptor.
*/
- if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
+ if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
+ fdunused(fdp, indx);
FILEDESC_XUNLOCK(fdp);
return (EACCES);
}
- fp = fdp->fd_ofiles[indx];
- fdp->fd_ofiles[indx] = wfp;
+ fdp->fd_ofiles[indx] = fp;
fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
- if (fp == NULL)
- fdused(fdp, indx);
- fhold(wfp);
- FILEDESC_XUNLOCK(fdp);
- if (fp != NULL)
- /*
- * We now own the reference to fp that the ofiles[]
- * array used to own. Release it.
- */
- fdrop(fp, td);
- return (0);
-
+ fhold(fp);
+ break;
case ENXIO:
/*
* Steal away the file pointer from dfd and stuff it into indx.
*/
- fp = fdp->fd_ofiles[indx];
- fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+ fdp->fd_ofiles[indx] = fp;
fdp->fd_ofiles[dfd] = NULL;
fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
fdp->fd_ofileflags[dfd] = 0;
fdunused(fdp, dfd);
- if (fp == NULL)
- fdused(fdp, indx);
- FILEDESC_XUNLOCK(fdp);
-
- /*
- * We now own the reference to fp that the ofiles[] array
- * used to own. Release it.
- */
- if (fp != NULL)
- fdrop(fp, td);
- return (0);
-
- default:
- FILEDESC_XUNLOCK(fdp);
- return (error);
+ break;
}
- /* NOTREACHED */
+ FILEDESC_XUNLOCK(fdp);
+ *indxp = indx;
+ return (0);
}
/*
@@ -2884,7 +2846,7 @@
xf.xf_type = fp->f_type;
xf.xf_count = fp->f_count;
xf.xf_msgcount = 0;
- xf.xf_offset = fp->f_offset;
+ xf.xf_offset = foffset_get(fp);
xf.xf_flag = fp->f_flag;
error = SYSCTL_OUT(req, &xf, sizeof(xf));
if (error)
@@ -3089,7 +3051,7 @@
kif->kf_flags |= KF_FLAG_DIRECT;
if (fp->f_flag & FHASLOCK)
kif->kf_flags |= KF_FLAG_HASLOCK;
- kif->kf_offset = fp->f_offset;
+ kif->kf_offset = foffset_get(fp);
if (vp != NULL) {
vref(vp);
switch (vp->v_type) {
@@ -3433,7 +3395,7 @@
}
refcnt = fp->f_count;
fflags = fp->f_flag;
- offset = fp->f_offset;
+ offset = foffset_get(fp);
/*
* Create sysctl entry.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_event.c
--- a/head/sys/kern/kern_event.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_event.c Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_event.c 233505 2012-03-26 09:34:17Z melifaro $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_event.c 238424 2012-07-13 13:24:33Z jhb $");
#include "opt_ktrace.h"
@@ -513,6 +513,10 @@
list->kl_unlock(list->kl_lockarg);
}
+/*
+ * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
+ * interval timer support code.
+ */
static int
timertoticks(intptr_t data)
{
@@ -526,7 +530,6 @@
return tticks;
}
-/* XXX - move to kern_timeout.c? */
static void
filt_timerexpire(void *knx)
{
@@ -536,9 +539,16 @@
kn->kn_data++;
KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */
+ /*
+ * timertoticks() uses tvtohz() which always adds 1 to allow
+ * for the time until the next clock interrupt being strictly
+ * less than 1 clock tick. We don't want that here since we
+ * want to appear to be in sync with the clock interrupt even
+ * when we're delayed.
+ */
if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) {
calloutp = (struct callout *)kn->kn_hook;
- callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata),
+ callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata) - 1,
filt_timerexpire, kn);
}
}
@@ -546,7 +556,6 @@
/*
* data contains amount of time to sleep, in milliseconds
*/
-/* XXX - move to kern_timeout.c? */
static int
filt_timerattach(struct knote *kn)
{
@@ -570,7 +579,6 @@
return (0);
}
-/* XXX - move to kern_timeout.c? */
static void
filt_timerdetach(struct knote *kn)
{
@@ -583,7 +591,6 @@
kn->kn_status |= KN_DETACHED; /* knlist_remove usually clears it */
}
-/* XXX - move to kern_timeout.c? */
static int
filt_timer(struct knote *kn, long hint)
{
@@ -692,7 +699,7 @@
if (error)
goto done2;
- /* An extra reference on `nfp' has been held for us by falloc(). */
+ /* An extra reference on `fp' has been held for us by falloc(). */
kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
TAILQ_INIT(&kq->kq_head);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_exec.c
--- a/head/sys/kern/kern_exec.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_exec.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_exec.c 232700 2012-03-08 19:41:05Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_exec.c 238220 2012-07-08 00:51:38Z mjg $");
#include "opt_capsicum.h"
#include "opt_hwpmc_hooks.h"
@@ -443,8 +443,10 @@
/*
* Some might argue that CAP_READ and/or CAP_MMAP should also
* be required here; such arguments will be entertained.
+ *
+ * Descriptors opened only with O_EXEC or O_RDONLY are allowed.
*/
- error = fgetvp_read(td, args->fd, CAP_FEXECVE, &binvp);
+ error = fgetvp_exec(td, args->fd, CAP_FEXECVE, &binvp);
if (error)
goto exec_fail;
vfslocked = VFS_LOCK_GIANT(binvp->v_mount);
@@ -1511,64 +1513,3 @@
execsw = newexecsw;
return (0);
}
-
-static vm_object_t shared_page_obj;
-static int shared_page_free;
-
-int
-shared_page_fill(int size, int align, const char *data)
-{
- vm_page_t m;
- struct sf_buf *s;
- vm_offset_t sk;
- int res;
-
- VM_OBJECT_LOCK(shared_page_obj);
- m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY);
- res = roundup(shared_page_free, align);
- if (res + size >= IDX_TO_OFF(shared_page_obj->size))
- res = -1;
- else {
- VM_OBJECT_UNLOCK(shared_page_obj);
- s = sf_buf_alloc(m, SFB_DEFAULT);
- sk = sf_buf_kva(s);
- bcopy(data, (void *)(sk + res), size);
- shared_page_free = res + size;
- sf_buf_free(s);
- VM_OBJECT_LOCK(shared_page_obj);
- }
- vm_page_wakeup(m);
- VM_OBJECT_UNLOCK(shared_page_obj);
- return (res);
-}
-
-static void
-shared_page_init(void *dummy __unused)
-{
- vm_page_t m;
-
- shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE,
- VM_PROT_DEFAULT, 0, NULL);
- VM_OBJECT_LOCK(shared_page_obj);
- m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY | VM_ALLOC_NOBUSY |
- VM_ALLOC_ZERO);
- m->valid = VM_PAGE_BITS_ALL;
- VM_OBJECT_UNLOCK(shared_page_obj);
-}
-
-SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init,
- NULL);
-
-void
-exec_sysvec_init(void *param)
-{
- struct sysentvec *sv;
-
- sv = (struct sysentvec *)param;
-
- if ((sv->sv_flags & SV_SHP) == 0)
- return;
- sv->sv_shared_page_obj = shared_page_obj;
- sv->sv_sigcode_base = sv->sv_shared_page_base +
- shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode);
-}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_fork.c
--- a/head/sys/kern/kern_fork.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_fork.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_fork.c 232240 2012-02-27 21:10:10Z kib $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_fork.c 237276 2012-06-19 22:21:59Z pjd $");
#include "opt_kdtrace.h"
#include "opt_ktrace.h"
@@ -475,7 +475,6 @@
bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
td2->td_sigstk = td->td_sigstk;
- td2->td_sigmask = td->td_sigmask;
td2->td_flags = TDF_INMEM;
td2->td_lend_user_pri = PRI_MAX;
@@ -922,8 +921,10 @@
*/
*procp = newproc;
#ifdef PROCDESC
- if (flags & RFPROCDESC)
+ if (flags & RFPROCDESC) {
procdesc_finit(newproc->p_procdesc, fp_procdesc);
+ fdrop(fp_procdesc, td);
+ }
#endif
racct_proc_fork_done(newproc);
return (0);
@@ -939,14 +940,16 @@
#ifdef MAC
mac_proc_destroy(newproc);
#endif
+ racct_proc_exit(newproc);
fail1:
- racct_proc_exit(newproc);
if (vm2 != NULL)
vmspace_free(vm2);
uma_zfree(proc_zone, newproc);
#ifdef PROCDESC
- if (((flags & RFPROCDESC) != 0) && (fp_procdesc != NULL))
+ if (((flags & RFPROCDESC) != 0) && (fp_procdesc != NULL)) {
+ fdclose(td->td_proc->p_fd, fp_procdesc, *procdescp, td);
fdrop(fp_procdesc, td);
+ }
#endif
pause("fork", hz / 2);
return (error);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_jail.c
--- a/head/sys/kern/kern_jail.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_jail.c Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 232598 2012-03-06 11:05:50Z trasz $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 235803 2012-05-22 19:43:20Z trasz $");
#include "opt_compat.h"
#include "opt_ddb.h"
@@ -1811,6 +1811,16 @@
}
}
+#ifdef RACCT
+ if (!created) {
+ sx_sunlock(&allprison_lock);
+ prison_racct_modify(pr);
+ sx_slock(&allprison_lock);
+ }
+#endif
+
+ td->td_retval[0] = pr->pr_id;
+
/*
* Now that it is all there, drop the temporary reference from existing
* prisons. Or add a reference to newly created persistent prisons
@@ -1832,12 +1842,6 @@
sx_sunlock(&allprison_lock);
}
-#ifdef RACCT
- if (!created)
- prison_racct_modify(pr);
-#endif
-
- td->td_retval[0] = pr->pr_id;
goto done_errmsg;
done_deref_locked:
@@ -4491,8 +4495,11 @@
sx_slock(&allproc_lock);
sx_xlock(&allprison_lock);
- if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0)
+ if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) {
+ sx_xunlock(&allprison_lock);
+ sx_sunlock(&allproc_lock);
return;
+ }
oldprr = pr->pr_prison_racct;
pr->pr_prison_racct = NULL;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_kthread.c
--- a/head/sys/kern/kern_kthread.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_kthread.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_kthread.c 232700 2012-03-08 19:41:05Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_kthread.c 236117 2012-05-26 20:03:47Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -271,7 +271,6 @@
bzero(&newtd->td_startzero,
__rangeof(struct thread, td_startzero, td_endzero));
-/* XXX check if we should zero. */
bcopy(&oldtd->td_startcopy, &newtd->td_startcopy,
__rangeof(struct thread, td_startcopy, td_endcopy));
@@ -295,7 +294,6 @@
/* this code almost the same as create_thread() in kern_thr.c */
PROC_LOCK(p);
p->p_flag |= P_HADTHREADS;
- newtd->td_sigmask = oldtd->td_sigmask; /* XXX dubious */
thread_link(newtd, p);
thread_lock(oldtd);
/* let the scheduler know about these things. */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_malloc.c
--- a/head/sys/kern/kern_malloc.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_malloc.c Wed Jul 25 16:40:53 2012 +0300
@@ -43,7 +43,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_malloc.c 232356 2012-03-01 19:58:34Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_malloc.c 238502 2012-07-15 20:29:48Z mdf $");
#include "opt_ddb.h"
#include "opt_kdtrace.h"
@@ -744,7 +744,7 @@
vm_kmem_size = 2 * mem_size * PAGE_SIZE;
#ifdef DEBUG_MEMGUARD
- tmp = memguard_fudge(vm_kmem_size, vm_kmem_size_max);
+ tmp = memguard_fudge(vm_kmem_size, kernel_map);
#else
tmp = vm_kmem_size;
#endif
@@ -1000,6 +1000,8 @@
db_printf("%18s %12ju %12juK %12ju\n",
mtp->ks_shortdesc, allocs - frees,
(alloced - freed + 1023) / 1024, allocs);
+ if (db_pager_quit)
+ break;
}
}
@@ -1029,6 +1031,8 @@
if (mtip->mti_zone != subzone)
continue;
db_printf("%s\n", mtp->ks_shortdesc);
+ if (db_pager_quit)
+ break;
}
}
#endif /* MALLOC_DEBUG_MAXZONES > 1 */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_proc.c
--- a/head/sys/kern/kern_proc.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_proc.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_proc.c 233389 2012-03-23 20:05:41Z trociny $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_proc.c 238527 2012-07-16 09:38:19Z pgj $");
#include "opt_compat.h"
#include "opt_ddb.h"
@@ -309,6 +309,30 @@
return (p);
}
+static struct proc *
+pfind_tid(pid_t tid)
+{
+ struct proc *p;
+ struct thread *td;
+
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NEW) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (td->td_tid == tid)
+ goto found;
+ }
+ PROC_UNLOCK(p);
+ }
+found:
+ sx_sunlock(&allproc_lock);
+ return (p);
+}
+
/*
* Locate a process group by number.
* The caller must hold proctree_lock.
@@ -339,7 +363,12 @@
struct proc *p;
int error;
- p = pfind(pid);
+ if (pid <= PID_MAX)
+ p = pfind(pid);
+ else if ((flags & PGET_NOTID) == 0)
+ p = pfind_tid(pid);
+ else
+ p = NULL;
if (p == NULL)
return (ESRCH);
if ((flags & PGET_CANSEE) != 0) {
@@ -849,6 +878,9 @@
kp->ki_childtime = kp->ki_childstime;
timevaladd(&kp->ki_childtime, &kp->ki_childutime);
+ FOREACH_THREAD_IN_PROC(p, td0)
+ kp->ki_cow += td0->td_cow;
+
tp = NULL;
if (p->p_pgrp) {
kp->ki_pgid = p->p_pgrp->pg_id;
@@ -961,6 +993,7 @@
kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
kp->ki_pctcpu = sched_pctcpu(td);
kp->ki_estcpu = td->td_estcpu;
+ kp->ki_cow = td->td_cow;
}
/* We can't get this anymore but ps etc never used it anyway. */
@@ -1103,6 +1136,7 @@
CP(*ki, *ki32, ki_estcpu);
CP(*ki, *ki32, ki_slptime);
CP(*ki, *ki32, ki_swtime);
+ CP(*ki, *ki32, ki_cow);
CP(*ki, *ki32, ki_runtime);
TV_CP(*ki, *ki32, ki_start);
TV_CP(*ki, *ki32, ki_childtime);
@@ -2155,6 +2189,10 @@
kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
+ if (entry->eflags & MAP_ENTRY_GROWS_UP)
+ kve->kve_flags |= KVME_FLAG_GROWS_UP;
+ if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
+ kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
last_timestamp = map->timestamp;
vm_map_unlock_read(map);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_racct.c
--- a/head/sys/kern/kern_racct.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_racct.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,11 +26,11 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/kern/kern_racct.c 234383 2012-04-17 14:31:02Z trasz $
+ * $FreeBSD: head/sys/kern/kern_racct.c 235787 2012-05-22 15:58:27Z trasz $
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 234383 2012-04-17 14:31:02Z trasz $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 235787 2012-05-22 15:58:27Z trasz $");
#include "opt_kdtrace.h"
@@ -573,6 +573,9 @@
PROC_UNLOCK(child);
PROC_UNLOCK(parent);
+ if (error != 0)
+ racct_proc_exit(child);
+
return (error);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_rangelock.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/kern/kern_rangelock.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,246 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov <kib at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/kern/kern_rangelock.c 236317 2012-05-30 16:06:38Z kib $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rangelock.h>
+#include <sys/systm.h>
+
+#include <vm/uma.h>
+
+struct rl_q_entry {
+ TAILQ_ENTRY(rl_q_entry) rl_q_link;
+ off_t rl_q_start, rl_q_end;
+ int rl_q_flags;
+};
+
+static uma_zone_t rl_entry_zone;
+
+static void
+rangelock_sys_init(void)
+{
+
+ rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
+
+static struct rl_q_entry *
+rlqentry_alloc(void)
+{
+
+ return (uma_zalloc(rl_entry_zone, M_WAITOK));
+}
+
+void
+rlqentry_free(struct rl_q_entry *rleq)
+{
+
+ uma_zfree(rl_entry_zone, rleq);
+}
+
+void
+rangelock_init(struct rangelock *lock)
+{
+
+ TAILQ_INIT(&lock->rl_waiters);
+ lock->rl_currdep = NULL;
+}
+
+void
+rangelock_destroy(struct rangelock *lock)
+{
+
+ KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
+}
+
+/*
+ * Verifies the supplied rl_q_entries for compatibility. Returns true
+ * if the rangelock queue entries are not compatible, false if they are.
+ *
+ * Two entries are compatible if their ranges do not overlap, or both
+ * entries are for read.
+ */
+static int
+rangelock_incompatible(const struct rl_q_entry *e1,
+ const struct rl_q_entry *e2)
+{
+
+ if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ &&
+ (e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ)
+ return (0);
+ if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
+ return (1);
+ return (0);
+}
+
+/*
+ * Recalculate the lock->rl_currdep after an unlock.
+ */
+static void
+rangelock_calc_block(struct rangelock *lock)
+{
+ struct rl_q_entry *entry, *entry1, *whead;
+
+ if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) &&
+ lock->rl_currdep != NULL)
+ lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link);
+ for (entry = lock->rl_currdep; entry != NULL;
+ entry = TAILQ_NEXT(entry, rl_q_link)) {
+ TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) {
+ if (rangelock_incompatible(entry, entry1))
+ goto out;
+ if (entry1 == entry)
+ break;
+ }
+ }
+out:
+ lock->rl_currdep = entry;
+ TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) {
+ if (whead == lock->rl_currdep)
+ break;
+ if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) {
+ whead->rl_q_flags |= RL_LOCK_GRANTED;
+ wakeup(whead);
+ }
+ }
+}
+
+static void
+rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
+ struct mtx *ilk)
+{
+
+ MPASS(lock != NULL && entry != NULL && ilk != NULL);
+ mtx_assert(ilk, MA_OWNED);
+ KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
+
+ TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
+ rangelock_calc_block(lock);
+ mtx_unlock(ilk);
+ if (curthread->td_rlqe == NULL)
+ curthread->td_rlqe = entry;
+ else
+ rlqentry_free(entry);
+}
+
+void
+rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
+{
+
+ MPASS(lock != NULL && cookie != NULL && ilk != NULL);
+
+ mtx_lock(ilk);
+ rangelock_unlock_locked(lock, cookie, ilk);
+}
+
+/*
+ * Unlock the sub-range of granted lock.
+ */
+void *
+rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
+ off_t end, struct mtx *ilk)
+{
+ struct rl_q_entry *entry;
+
+ MPASS(lock != NULL && cookie != NULL && ilk != NULL);
+ entry = cookie;
+ KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
+ ("Unlocking non-granted lock"));
+ KASSERT(entry->rl_q_start == start, ("wrong start"));
+ KASSERT(entry->rl_q_end >= end, ("wrong end"));
+
+ mtx_lock(ilk);
+ if (entry->rl_q_end == end) {
+ rangelock_unlock_locked(lock, cookie, ilk);
+ return (NULL);
+ }
+ entry->rl_q_end = end;
+ rangelock_calc_block(lock);
+ mtx_unlock(ilk);
+ return (cookie);
+}
+
+/*
+ * Add the lock request to the queue of the pending requests for
+ * rangelock. Sleep until the request can be granted.
+ */
+static void *
+rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
+ struct mtx *ilk)
+{
+ struct rl_q_entry *entry;
+ struct thread *td;
+
+ MPASS(lock != NULL && ilk != NULL);
+
+ td = curthread;
+ if (td->td_rlqe != NULL) {
+ entry = td->td_rlqe;
+ td->td_rlqe = NULL;
+ } else
+ entry = rlqentry_alloc();
+ MPASS(entry != NULL);
+ entry->rl_q_flags = mode;
+ entry->rl_q_start = start;
+ entry->rl_q_end = end;
+
+ mtx_lock(ilk);
+ /*
+ * XXXKIB TODO. Check that a thread does not try to enqueue a
+ * lock that is incompatible with another request from the same
+ * thread.
+ */
+
+ TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
+ if (lock->rl_currdep == NULL)
+ lock->rl_currdep = entry;
+ rangelock_calc_block(lock);
+ while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
+ msleep(entry, ilk, 0, "range", 0);
+ mtx_unlock(ilk);
+ return (entry);
+}
+
+void *
+rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
+{
+
+ return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk));
+}
+
+void *
+rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
+{
+
+ return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk));
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_sharedpage.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/kern/kern_sharedpage.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,240 @@
+/*-
+ * Copyright (c) 2010, 2012 Konstantin Belousov <kib at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/kern/kern_sharedpage.c 237477 2012-06-23 10:15:23Z kib $");
+
+#include "opt_compat.h"
+#include "opt_vm.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sysent.h>
+#include <sys/sysctl.h>
+#include <sys/vdso.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+static struct sx shared_page_alloc_sx;
+static vm_object_t shared_page_obj;
+static int shared_page_free;
+char *shared_page_mapping;
+
+void
+shared_page_write(int base, int size, const void *data)
+{
+
+ bcopy(data, shared_page_mapping + base, size);
+}
+
+static int
+shared_page_alloc_locked(int size, int align)
+{
+ int res;
+
+ res = roundup(shared_page_free, align);
+ if (res + size >= IDX_TO_OFF(shared_page_obj->size))
+ res = -1;
+ else
+ shared_page_free = res + size;
+ return (res);
+}
+
+int
+shared_page_alloc(int size, int align)
+{
+ int res;
+
+ sx_xlock(&shared_page_alloc_sx);
+ res = shared_page_alloc_locked(size, align);
+ sx_xunlock(&shared_page_alloc_sx);
+ return (res);
+}
+
+int
+shared_page_fill(int size, int align, const void *data)
+{
+ int res;
+
+ sx_xlock(&shared_page_alloc_sx);
+ res = shared_page_alloc_locked(size, align);
+ if (res != -1)
+ shared_page_write(res, size, data);
+ sx_xunlock(&shared_page_alloc_sx);
+ return (res);
+}
+
+static void
+shared_page_init(void *dummy __unused)
+{
+ vm_page_t m;
+ vm_offset_t addr;
+
+ sx_init(&shared_page_alloc_sx, "shpsx");
+ shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE,
+ VM_PROT_DEFAULT, 0, NULL);
+ VM_OBJECT_LOCK(shared_page_obj);
+ m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY | VM_ALLOC_NOBUSY |
+ VM_ALLOC_ZERO);
+ m->valid = VM_PAGE_BITS_ALL;
+ VM_OBJECT_UNLOCK(shared_page_obj);
+ addr = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+ pmap_qenter(addr, &m, 1);
+ shared_page_mapping = (char *)addr;
+}
+
+SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init,
+ NULL);
+
+static void
+timehands_update(struct sysentvec *sv)
+{
+ struct vdso_timehands th;
+ struct vdso_timekeep *tk;
+ uint32_t enabled, idx;
+
+ enabled = tc_fill_vdso_timehands(&th);
+ tk = (struct vdso_timekeep *)(shared_page_mapping +
+ sv->sv_timekeep_off);
+ idx = sv->sv_timekeep_curr;
+ atomic_store_rel_32(&tk->tk_th[idx].th_gen, 0);
+ if (++idx >= VDSO_TH_NUM)
+ idx = 0;
+ sv->sv_timekeep_curr = idx;
+ if (++sv->sv_timekeep_gen == 0)
+ sv->sv_timekeep_gen = 1;
+ th.th_gen = 0;
+ if (enabled)
+ tk->tk_th[idx] = th;
+ tk->tk_enabled = enabled;
+ atomic_store_rel_32(&tk->tk_th[idx].th_gen, sv->sv_timekeep_gen);
+ tk->tk_current = idx;
+}
+
+#ifdef COMPAT_FREEBSD32
+static void
+timehands_update32(struct sysentvec *sv)
+{
+ struct vdso_timekeep32 *tk;
+ struct vdso_timehands32 th;
+ uint32_t enabled, idx;
+
+ enabled = tc_fill_vdso_timehands32(&th);
+ tk = (struct vdso_timekeep32 *)(shared_page_mapping +
+ sv->sv_timekeep_off);
+ idx = sv->sv_timekeep_curr;
+ atomic_store_rel_32(&tk->tk_th[idx].th_gen, 0);
+ if (++idx >= VDSO_TH_NUM)
+ idx = 0;
+ sv->sv_timekeep_curr = idx;
+ if (++sv->sv_timekeep_gen == 0)
+ sv->sv_timekeep_gen = 1;
+ th.th_gen = 0;
+ if (enabled)
+ tk->tk_th[idx] = th;
+ tk->tk_enabled = enabled;
+ atomic_store_rel_32(&tk->tk_th[idx].th_gen, sv->sv_timekeep_gen);
+ tk->tk_current = idx;
+}
+#endif
+
+/*
+ * This is hackish, but easiest way to avoid creating list structures
+ * that needs to be iterated over from the hardclock interrupt
+ * context.
+ */
+static struct sysentvec *host_sysentvec;
+#ifdef COMPAT_FREEBSD32
+static struct sysentvec *compat32_sysentvec;
+#endif
+
+void
+timekeep_push_vdso(void)
+{
+
+ if (host_sysentvec != NULL && host_sysentvec->sv_timekeep_base != 0)
+ timehands_update(host_sysentvec);
+#ifdef COMPAT_FREEBSD32
+ if (compat32_sysentvec != NULL &&
+ compat32_sysentvec->sv_timekeep_base != 0)
+ timehands_update32(compat32_sysentvec);
+#endif
+}
+
+void
+exec_sysvec_init(void *param)
+{
+ struct sysentvec *sv;
+ int tk_base;
+ uint32_t tk_ver;
+
+ sv = (struct sysentvec *)param;
+
+ if ((sv->sv_flags & SV_SHP) == 0)
+ return;
+ sv->sv_shared_page_obj = shared_page_obj;
+ sv->sv_sigcode_base = sv->sv_shared_page_base +
+ shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode);
+ if ((sv->sv_flags & SV_ABI_MASK) != SV_ABI_FREEBSD)
+ return;
+ tk_ver = VDSO_TK_VER_CURR;
+#ifdef COMPAT_FREEBSD32
+ if ((sv->sv_flags & SV_ILP32) != 0) {
+ tk_base = shared_page_alloc(sizeof(struct vdso_timekeep32) +
+ sizeof(struct vdso_timehands32) * VDSO_TH_NUM, 16);
+ KASSERT(tk_base != -1, ("tk_base -1 for 32bit"));
+ shared_page_write(tk_base + offsetof(struct vdso_timekeep32,
+ tk_ver), sizeof(uint32_t), &tk_ver);
+ KASSERT(compat32_sysentvec == 0,
+ ("Native compat32 already registered"));
+ compat32_sysentvec = sv;
+ } else {
+#endif
+ tk_base = shared_page_alloc(sizeof(struct vdso_timekeep) +
+ sizeof(struct vdso_timehands) * VDSO_TH_NUM, 16);
+ KASSERT(tk_base != -1, ("tk_base -1 for native"));
+ shared_page_write(tk_base + offsetof(struct vdso_timekeep,
+ tk_ver), sizeof(uint32_t), &tk_ver);
+ KASSERT(host_sysentvec == 0, ("Native already registered"));
+ host_sysentvec = sv;
+#ifdef COMPAT_FREEBSD32
+ }
+#endif
+ sv->sv_timekeep_base = sv->sv_shared_page_base + tk_base;
+ sv->sv_timekeep_off = tk_base;
+ timekeep_push_vdso();
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_shutdown.c
--- a/head/sys/kern/kern_shutdown.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_shutdown.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_shutdown.c 230643 2012-01-28 14:00:21Z attilio $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_shutdown.c 236503 2012-06-03 08:01:12Z avg $");
#include "opt_ddb.h"
#include "opt_kdb.h"
@@ -66,9 +66,7 @@
#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/vnode.h>
-#ifdef SW_WATCHDOG
#include <sys/watchdog.h>
-#endif
#include <ddb/ddb.h>
@@ -151,7 +149,7 @@
/* Context information for dump-debuggers. */
static struct pcb dumppcb; /* Registers. */
-static lwpid_t dumptid; /* Thread ID. */
+lwpid_t dumptid; /* Thread ID. */
static void poweroff_wait(void *, int);
static void shutdown_halt(void *junk, int howto);
@@ -334,9 +332,7 @@
waittime = 0;
-#ifdef SW_WATCHDOG
wdog_kern_pat(WD_LASTVAL);
-#endif
sys_sync(curthread, NULL);
/*
@@ -362,9 +358,8 @@
if (nbusy < pbusy)
iter = 0;
pbusy = nbusy;
-#ifdef SW_WATCHDOG
+
wdog_kern_pat(WD_LASTVAL);
-#endif
sys_sync(curthread, NULL);
#ifdef PREEMPTION
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_sig.c
--- a/head/sys/kern/kern_sig.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_sig.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_sig.c 234172 2012-04-12 10:48:43Z kib $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_sig.c 238336 2012-07-10 05:45:13Z davidxu $");
#include "opt_compat.h"
#include "opt_kdtrace.h"
@@ -2436,9 +2436,10 @@
}
stopme:
thread_suspend_switch(td);
- if (!(p->p_flag & P_TRACED)) {
+ if (p->p_xthread == td)
+ p->p_xthread = NULL;
+ if (!(p->p_flag & P_TRACED))
break;
- }
if (td->td_dbgflags & TDB_SUSPEND) {
if (p->p_flag & P_SINGLE_EXIT)
break;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_synch.c
--- a/head/sys/kern/kern_synch.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_synch.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,8 +35,9 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_synch.c 234494 2012-04-20 15:32:36Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_synch.c 235459 2012-05-15 01:30:25Z rstone $");
+#include "opt_kdtrace.h"
#include "opt_ktrace.h"
#include "opt_sched.h"
@@ -51,6 +52,7 @@
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/sleepqueue.h>
#include <sys/smp.h>
@@ -105,6 +107,20 @@
static void loadav(void *arg);
+SDT_PROVIDER_DECLARE(sched);
+SDT_PROBE_DEFINE(sched, , , preempt, preempt);
+
+/*
+ * These probes reference Solaris features that are not implemented in FreeBSD.
+ * Create the probes anyway for compatibility with existing D scripts; they'll
+ * just never fire.
+ */
+SDT_PROBE_DEFINE(sched, , , cpucaps_sleep, cpucaps-sleep);
+SDT_PROBE_DEFINE(sched, , , cpucaps_wakeup, cpucaps-wakeup);
+SDT_PROBE_DEFINE(sched, , , schedctl_nopreempt, schedctl-nopreempt);
+SDT_PROBE_DEFINE(sched, , , schedctl_preempt, schedctl-preempt);
+SDT_PROBE_DEFINE(sched, , , schedctl_yield, schedctl-yield);
+
void
sleepinit(void)
{
@@ -462,6 +478,7 @@
"prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,
"lockname:\"%s\"", td->td_lockname);
#endif
+ SDT_PROBE0(sched, , , preempt);
#ifdef XEN
PT_UPDATES_FLUSH();
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_tc.c
--- a/head/sys/kern/kern_tc.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_tc.c Wed Jul 25 16:40:53 2012 +0300
@@ -14,8 +14,9 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_tc.c 232449 2012-03-03 08:19:18Z jmallett $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_tc.c 238537 2012-07-16 20:17:19Z gnn $");
+#include "opt_compat.h"
#include "opt_ntp.h"
#include "opt_ffclock.h"
@@ -32,6 +33,7 @@
#include <sys/timepps.h>
#include <sys/timetc.h>
#include <sys/timex.h>
+#include <sys/vdso.h>
/*
* A large step happens on boot. This constant detects such steps.
@@ -120,6 +122,8 @@
static void tc_windup(void);
static void cpu_tick_calibrate(int);
+void dtrace_getnanotime(struct timespec *tsp);
+
static int
sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
{
@@ -958,6 +962,24 @@
#endif /* FFCLOCK */
/*
+ * This is a clone of getnanotime and used for walltimestamps.
+ * The dtrace_ prefix prevents fbt from creating probes for
+ * it so walltimestamp can be safely used in all fbt probes.
+ */
+void
+dtrace_getnanotime(struct timespec *tsp)
+{
+ struct timehands *th;
+ u_int gen;
+
+ do {
+ th = timehands;
+ gen = th->th_generation;
+ *tsp = th->th_nanotime;
+ } while (gen == 0 || gen != th->th_generation);
+}
+
+/*
* System clock currently providing time to the system. Modifiable via sysctl
* when the FFCLOCK option is defined.
*/
@@ -1360,6 +1382,7 @@
#endif
timehands = th;
+ timekeep_push_vdso();
}
/* Report or change the active timecounter hardware. */
@@ -1386,6 +1409,7 @@
(void)newtc->tc_get_timecount(newtc);
timecounter = newtc;
+ timekeep_push_vdso();
return (0);
}
return (EINVAL);
@@ -1844,3 +1868,63 @@
}
cpu_tick_f *cpu_ticks = tc_cpu_ticks;
+
+static int vdso_th_enable = 1;
+static int
+sysctl_fast_gettime(SYSCTL_HANDLER_ARGS)
+{
+ int old_vdso_th_enable, error;
+
+ old_vdso_th_enable = vdso_th_enable;
+ error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req);
+ if (error != 0)
+ return (error);
+ vdso_th_enable = old_vdso_th_enable;
+ timekeep_push_vdso();
+ return (0);
+}
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day");
+
+uint32_t
+tc_fill_vdso_timehands(struct vdso_timehands *vdso_th)
+{
+ struct timehands *th;
+ uint32_t enabled;
+
+ th = timehands;
+ vdso_th->th_algo = VDSO_TH_ALGO_1;
+ vdso_th->th_scale = th->th_scale;
+ vdso_th->th_offset_count = th->th_offset_count;
+ vdso_th->th_counter_mask = th->th_counter->tc_counter_mask;
+ vdso_th->th_offset = th->th_offset;
+ vdso_th->th_boottime = boottimebin;
+ enabled = cpu_fill_vdso_timehands(vdso_th);
+ if (!vdso_th_enable)
+ enabled = 0;
+ return (enabled);
+}
+
+#ifdef COMPAT_FREEBSD32
+uint32_t
+tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
+{
+ struct timehands *th;
+ uint32_t enabled;
+
+ th = timehands;
+ vdso_th32->th_algo = VDSO_TH_ALGO_1;
+ *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale;
+ vdso_th32->th_offset_count = th->th_offset_count;
+ vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask;
+ vdso_th32->th_offset.sec = th->th_offset.sec;
+ *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac;
+ vdso_th32->th_boottime.sec = boottimebin.sec;
+ *(uint64_t *)&vdso_th32->th_boottime.frac[0] = boottimebin.frac;
+ enabled = cpu_fill_vdso_timehands32(vdso_th32);
+ if (!vdso_th_enable)
+ enabled = 0;
+ return (enabled);
+}
+#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_thr.c
--- a/head/sys/kern/kern_thr.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_thr.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_thr.c 234381 2012-04-17 13:44:40Z trasz $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_thr.c 236117 2012-05-26 20:03:47Z kib $");
#include "opt_compat.h"
#include "opt_posix.h"
@@ -252,7 +252,6 @@
PROC_LOCK(td->td_proc);
td->td_proc->p_flag |= P_HADTHREADS;
- newtd->td_sigmask = td->td_sigmask;
thread_link(newtd, p);
bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
thread_lock(td);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_thread.c
--- a/head/sys/kern/kern_thread.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_thread.c Wed Jul 25 16:40:53 2012 +0300
@@ -27,10 +27,11 @@
*/
#include "opt_witness.h"
+#include "opt_kdtrace.h"
#include "opt_hwpmc_hooks.h"
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 229429 2012-01-03 21:03:28Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 236317 2012-05-30 16:06:38Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -38,7 +39,9 @@
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rangelock.h>
#include <sys/resourcevar.h>
+#include <sys/sdt.h>
#include <sys/smp.h>
#include <sys/sched.h>
#include <sys/sleepqueue.h>
@@ -59,6 +62,10 @@
#include <vm/uma.h>
#include <sys/eventhandler.h>
+SDT_PROVIDER_DECLARE(proc);
+SDT_PROBE_DEFINE(proc, , , lwp_exit, lwp-exit);
+
+
/*
* thread related storage.
*/
@@ -199,6 +206,7 @@
td->td_sleepqueue = sleepq_alloc();
td->td_turnstile = turnstile_alloc();
+ td->td_rlqe = NULL;
EVENTHANDLER_INVOKE(thread_init, td);
td->td_sched = (struct td_sched *)&td[1];
umtx_thread_init(td);
@@ -216,6 +224,7 @@
td = (struct thread *)mem;
EVENTHANDLER_INVOKE(thread_fini, td);
+ rlqentry_free(td->td_rlqe);
turnstile_free(td->td_turnstile);
sleepq_free(td->td_sleepqueue);
umtx_thread_fini(td);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/kern_timeout.c
--- a/head/sys/kern/kern_timeout.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/kern_timeout.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/kern_timeout.c 227293 2011-11-07 06:44:47Z ed $");
+__FBSDID("$FreeBSD: head/sys/kern/kern_timeout.c 234981 2012-05-03 20:00:30Z kib $");
#include "opt_kdtrace.h"
@@ -437,6 +437,181 @@
}
}
+static void
+callout_cc_del(struct callout *c, struct callout_cpu *cc)
+{
+
+ if (cc->cc_next == c)
+ cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+ if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
+ c->c_func = NULL;
+ SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
+ }
+}
+
+static struct callout *
+softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
+ int *lockcalls, int *gcalls)
+{
+ void (*c_func)(void *);
+ void *c_arg;
+ struct lock_class *class;
+ struct lock_object *c_lock;
+ int c_flags, sharedlock;
+#ifdef SMP
+ struct callout_cpu *new_cc;
+ void (*new_func)(void *);
+ void *new_arg;
+ int new_cpu, new_ticks;
+#endif
+#ifdef DIAGNOSTIC
+ struct bintime bt1, bt2;
+ struct timespec ts2;
+ static uint64_t maxdt = 36893488147419102LL; /* 2 msec */
+ static timeout_t *lastfunc;
+#endif
+
+ cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+ class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
+ sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
+ c_lock = c->c_lock;
+ c_func = c->c_func;
+ c_arg = c->c_arg;
+ c_flags = c->c_flags;
+ if (c->c_flags & CALLOUT_LOCAL_ALLOC)
+ c->c_flags = CALLOUT_LOCAL_ALLOC;
+ else
+ c->c_flags &= ~CALLOUT_PENDING;
+ cc->cc_curr = c;
+ cc->cc_cancel = 0;
+ CC_UNLOCK(cc);
+ if (c_lock != NULL) {
+ class->lc_lock(c_lock, sharedlock);
+ /*
+ * The callout may have been cancelled
+ * while we switched locks.
+ */
+ if (cc->cc_cancel) {
+ class->lc_unlock(c_lock);
+ goto skip;
+ }
+ /* The callout cannot be stopped now. */
+ cc->cc_cancel = 1;
+
+ if (c_lock == &Giant.lock_object) {
+ (*gcalls)++;
+ CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
+ c, c_func, c_arg);
+ } else {
+ (*lockcalls)++;
+ CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
+ c, c_func, c_arg);
+ }
+ } else {
+ (*mpcalls)++;
+ CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p",
+ c, c_func, c_arg);
+ }
+#ifdef DIAGNOSTIC
+ binuptime(&bt1);
+#endif
+ THREAD_NO_SLEEPING();
+ SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0);
+ c_func(c_arg);
+ SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0);
+ THREAD_SLEEPING_OK();
+#ifdef DIAGNOSTIC
+ binuptime(&bt2);
+ bintime_sub(&bt2, &bt1);
+ if (bt2.frac > maxdt) {
+ if (lastfunc != c_func || bt2.frac > maxdt * 2) {
+ bintime2timespec(&bt2, &ts2);
+ printf(
+ "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
+ c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
+ }
+ maxdt = bt2.frac;
+ lastfunc = c_func;
+ }
+#endif
+ CTR1(KTR_CALLOUT, "callout %p finished", c);
+ if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
+ class->lc_unlock(c_lock);
+skip:
+ CC_LOCK(cc);
+ /*
+ * If the current callout is locally allocated (from
+ * timeout(9)) then put it on the freelist.
+ *
+ * Note: we need to check the cached copy of c_flags because
+ * if it was not local, then it's not safe to deref the
+ * callout pointer.
+ */
+ if (c_flags & CALLOUT_LOCAL_ALLOC) {
+ KASSERT(c->c_flags == CALLOUT_LOCAL_ALLOC,
+ ("corrupted callout"));
+ c->c_func = NULL;
+ SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
+ }
+ cc->cc_curr = NULL;
+ if (cc->cc_waiting) {
+ /*
+ * There is someone waiting for the
+ * callout to complete.
+ * If the callout was scheduled for
+ * migration just cancel it.
+ */
+ if (cc_cme_migrating(cc))
+ cc_cme_cleanup(cc);
+ cc->cc_waiting = 0;
+ CC_UNLOCK(cc);
+ wakeup(&cc->cc_waiting);
+ CC_LOCK(cc);
+ } else if (cc_cme_migrating(cc)) {
+#ifdef SMP
+ /*
+ * If the callout was scheduled for
+ * migration just perform it now.
+ */
+ new_cpu = cc->cc_migration_cpu;
+ new_ticks = cc->cc_migration_ticks;
+ new_func = cc->cc_migration_func;
+ new_arg = cc->cc_migration_arg;
+ cc_cme_cleanup(cc);
+
+ /*
+ * Handle deferred callout stops
+ */
+ if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
+ CTR3(KTR_CALLOUT,
+ "deferred cancelled %p func %p arg %p",
+ c, new_func, new_arg);
+ callout_cc_del(c, cc);
+ goto nextc;
+ }
+
+ c->c_flags &= ~CALLOUT_DFRMIGRATION;
+
+ /*
+ * It should be assert here that the
+ * callout is not destroyed but that
+ * is not easy.
+ */
+ new_cc = callout_cpu_switch(c, cc, new_cpu);
+ callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
+ new_cpu);
+ CC_UNLOCK(new_cc);
+ CC_LOCK(cc);
+#else
+ panic("migration should not happen");
+#endif
+ }
+#ifdef SMP
+nextc:
+#endif
+ return (cc->cc_next);
+}
+
/*
* The callout mechanism is based on the work of Adam M. Costello and
* George Varghese, published in a technical report entitled "Redesigning
@@ -465,12 +640,6 @@
int mpcalls;
int lockcalls;
int gcalls;
-#ifdef DIAGNOSTIC
- struct bintime bt1, bt2;
- struct timespec ts2;
- static uint64_t maxdt = 36893488147419102LL; /* 2 msec */
- static timeout_t *lastfunc;
-#endif
#ifndef MAX_SOFTCLOCK_STEPS
#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
@@ -492,7 +661,7 @@
cc->cc_softticks++;
bucket = &cc->cc_callwheel[curticks & callwheelmask];
c = TAILQ_FIRST(bucket);
- while (c) {
+ while (c != NULL) {
depth++;
if (c->c_time != curticks) {
c = TAILQ_NEXT(c, c_links.tqe);
@@ -507,160 +676,10 @@
steps = 0;
}
} else {
- void (*c_func)(void *);
- void *c_arg;
- struct lock_class *class;
- struct lock_object *c_lock;
- int c_flags, sharedlock;
-
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
TAILQ_REMOVE(bucket, c, c_links.tqe);
- class = (c->c_lock != NULL) ?
- LOCK_CLASS(c->c_lock) : NULL;
- sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ?
- 0 : 1;
- c_lock = c->c_lock;
- c_func = c->c_func;
- c_arg = c->c_arg;
- c_flags = c->c_flags;
- if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
- c->c_flags = CALLOUT_LOCAL_ALLOC;
- } else {
- c->c_flags =
- (c->c_flags & ~CALLOUT_PENDING);
- }
- cc->cc_curr = c;
- cc->cc_cancel = 0;
- CC_UNLOCK(cc);
- if (c_lock != NULL) {
- class->lc_lock(c_lock, sharedlock);
- /*
- * The callout may have been cancelled
- * while we switched locks.
- */
- if (cc->cc_cancel) {
- class->lc_unlock(c_lock);
- goto skip;
- }
- /* The callout cannot be stopped now. */
- cc->cc_cancel = 1;
-
- if (c_lock == &Giant.lock_object) {
- gcalls++;
- CTR3(KTR_CALLOUT,
- "callout %p func %p arg %p",
- c, c_func, c_arg);
- } else {
- lockcalls++;
- CTR3(KTR_CALLOUT, "callout lock"
- " %p func %p arg %p",
- c, c_func, c_arg);
- }
- } else {
- mpcalls++;
- CTR3(KTR_CALLOUT,
- "callout mpsafe %p func %p arg %p",
- c, c_func, c_arg);
- }
-#ifdef DIAGNOSTIC
- binuptime(&bt1);
-#endif
- THREAD_NO_SLEEPING();
- SDT_PROBE(callout_execute, kernel, ,
- callout_start, c, 0, 0, 0, 0);
- c_func(c_arg);
- SDT_PROBE(callout_execute, kernel, ,
- callout_end, c, 0, 0, 0, 0);
- THREAD_SLEEPING_OK();
-#ifdef DIAGNOSTIC
- binuptime(&bt2);
- bintime_sub(&bt2, &bt1);
- if (bt2.frac > maxdt) {
- if (lastfunc != c_func ||
- bt2.frac > maxdt * 2) {
- bintime2timespec(&bt2, &ts2);
- printf(
- "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
- c_func, c_arg,
- (intmax_t)ts2.tv_sec,
- ts2.tv_nsec);
- }
- maxdt = bt2.frac;
- lastfunc = c_func;
- }
-#endif
- CTR1(KTR_CALLOUT, "callout %p finished", c);
- if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
- class->lc_unlock(c_lock);
- skip:
- CC_LOCK(cc);
- /*
- * If the current callout is locally
- * allocated (from timeout(9))
- * then put it on the freelist.
- *
- * Note: we need to check the cached
- * copy of c_flags because if it was not
- * local, then it's not safe to deref the
- * callout pointer.
- */
- if (c_flags & CALLOUT_LOCAL_ALLOC) {
- KASSERT(c->c_flags ==
- CALLOUT_LOCAL_ALLOC,
- ("corrupted callout"));
- c->c_func = NULL;
- SLIST_INSERT_HEAD(&cc->cc_callfree, c,
- c_links.sle);
- }
- cc->cc_curr = NULL;
- if (cc->cc_waiting) {
-
- /*
- * There is someone waiting for the
- * callout to complete.
- * If the callout was scheduled for
- * migration just cancel it.
- */
- if (cc_cme_migrating(cc))
- cc_cme_cleanup(cc);
- cc->cc_waiting = 0;
- CC_UNLOCK(cc);
- wakeup(&cc->cc_waiting);
- CC_LOCK(cc);
- } else if (cc_cme_migrating(cc)) {
-#ifdef SMP
- struct callout_cpu *new_cc;
- void (*new_func)(void *);
- void *new_arg;
- int new_cpu, new_ticks;
-
- /*
- * If the callout was scheduled for
- * migration just perform it now.
- */
- new_cpu = cc->cc_migration_cpu;
- new_ticks = cc->cc_migration_ticks;
- new_func = cc->cc_migration_func;
- new_arg = cc->cc_migration_arg;
- cc_cme_cleanup(cc);
-
- /*
- * It should be assert here that the
- * callout is not destroyed but that
- * is not easy.
- */
- new_cc = callout_cpu_switch(c, cc,
- new_cpu);
- callout_cc_add(c, new_cc, new_ticks,
- new_func, new_arg, new_cpu);
- CC_UNLOCK(new_cc);
- CC_LOCK(cc);
-#else
- panic("migration should not happen");
-#endif
- }
+ c = softclock_call_cc(c, cc, &mpcalls,
+ &lockcalls, &gcalls);
steps = 0;
- c = cc->cc_next;
}
}
}
@@ -814,6 +833,7 @@
cc->cc_migration_ticks = to_ticks;
cc->cc_migration_func = ftn;
cc->cc_migration_arg = arg;
+ c->c_flags |= CALLOUT_DFRMIGRATION;
CTR5(KTR_CALLOUT,
"migration of %p func %p arg %p in %d to %u deferred",
c, c->c_func, c->c_arg, to_ticks, cpu);
@@ -984,6 +1004,12 @@
CC_UNLOCK(cc);
KASSERT(!sq_locked, ("sleepqueue chain locked"));
return (1);
+ } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
+ c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
+ c, c->c_func, c->c_arg);
+ CC_UNLOCK(cc);
+ return (1);
}
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
c, c->c_func, c->c_arg);
@@ -996,19 +1022,12 @@
c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
- if (cc->cc_next == c) {
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- }
+ CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
+ c, c->c_func, c->c_arg);
TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
c_links.tqe);
+ callout_cc_del(c, cc);
- CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
- c, c->c_func, c->c_arg);
-
- if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
- c->c_func = NULL;
- SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
- }
CC_UNLOCK(cc);
return (1);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/sched_4bsd.c
--- a/head/sys/kern/sched_4bsd.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/sched_4bsd.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/sched_4bsd.c 232700 2012-03-08 19:41:05Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/sched_4bsd.c 235471 2012-05-15 10:58:17Z pluknet $");
#include "opt_hwpmc_hooks.h"
#include "opt_sched.h"
@@ -50,6 +50,7 @@
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#include <sys/sdt.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/sx.h>
@@ -244,12 +245,31 @@
"allow threads to share a quantum");
#endif
+SDT_PROVIDER_DEFINE(sched);
+
+SDT_PROBE_DEFINE3(sched, , , change_pri, change-pri, "struct thread *",
+ "struct proc *", "uint8_t");
+SDT_PROBE_DEFINE3(sched, , , dequeue, dequeue, "struct thread *",
+ "struct proc *", "void *");
+SDT_PROBE_DEFINE4(sched, , , enqueue, enqueue, "struct thread *",
+ "struct proc *", "void *", "int");
+SDT_PROBE_DEFINE4(sched, , , lend_pri, lend-pri, "struct thread *",
+ "struct proc *", "uint8_t", "struct thread *");
+SDT_PROBE_DEFINE2(sched, , , load_change, load-change, "int", "int");
+SDT_PROBE_DEFINE2(sched, , , off_cpu, off-cpu, "struct thread *",
+ "struct proc *");
+SDT_PROBE_DEFINE(sched, , , on_cpu, on-cpu);
+SDT_PROBE_DEFINE(sched, , , remain_cpu, remain-cpu);
+SDT_PROBE_DEFINE2(sched, , , surrender, surrender, "struct thread *",
+ "struct proc *");
+
static __inline void
sched_load_add(void)
{
sched_tdcnt++;
KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt);
+ SDT_PROBE2(sched, , , load_change, NOCPU, sched_tdcnt);
}
static __inline void
@@ -258,6 +278,7 @@
sched_tdcnt--;
KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt);
+ SDT_PROBE2(sched, , , load_change, NOCPU, sched_tdcnt);
}
/*
* Arrange to reschedule if necessary, taking the priorities and
@@ -795,10 +816,13 @@
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "priority change",
"prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED,
sched_tdname(curthread));
+ SDT_PROBE3(sched, , , change_pri, td, td->td_proc, prio);
if (td != curthread && prio > td->td_priority) {
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread),
"lend prio", "prio:%d", td->td_priority, "new prio:%d",
prio, KTR_ATTR_LINKED, sched_tdname(td));
+ SDT_PROBE4(sched, , , lend_pri, td, td->td_proc, prio,
+ curthread);
}
THREAD_LOCK_ASSERT(td, MA_OWNED);
if (td->td_priority == prio)
@@ -987,6 +1011,9 @@
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
+
+ SDT_PROBE2(sched, , , off_cpu, td, td->td_proc);
+
/* I feel sleepy */
lock_profile_release_lock(&sched_lock.lock_object);
#ifdef KDTRACE_HOOKS
@@ -1018,11 +1045,14 @@
* needed to, or the thread_wait() or wait() will
* need to reap it.
*/
+
+ SDT_PROBE0(sched, , , on_cpu);
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
#endif
- }
+ } else
+ SDT_PROBE0(sched, , , remain_cpu);
#ifdef SMP
if (td->td_flags & TDF_IDLETD)
@@ -1223,6 +1253,8 @@
sched_tdname(curthread));
KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup",
KTR_ATTR_LINKED, sched_tdname(td));
+ SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL,
+ flags & SRQ_PREEMPTED);
/*
@@ -1315,6 +1347,8 @@
sched_tdname(curthread));
KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup",
KTR_ATTR_LINKED, sched_tdname(td));
+ SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL,
+ flags & SRQ_PREEMPTED);
/*
* Now that the thread is moving to the run-queue, set the lock
@@ -1362,6 +1396,7 @@
KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq rem",
"prio:%d", td->td_priority, KTR_ATTR_LINKED,
sched_tdname(curthread));
+ SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL);
if ((td->td_flags & TDF_NOLOAD) == 0)
sched_load_rem();
@@ -1425,6 +1460,8 @@
void
sched_preempt(struct thread *td)
{
+
+ SDT_PROBE2(sched, , , surrender, td, td->td_proc);
thread_lock(td);
if (td->td_critnest > 1)
td->td_owepreempt = 1;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/sched_ule.c
--- a/head/sys/kern/sched_ule.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/sched_ule.c Wed Jul 25 16:40:53 2012 +0300
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 234066 2012-04-09 18:24:58Z mav $");
+__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 236141 2012-05-27 10:25:20Z raj $");
#include "opt_hwpmc_hooks.h"
#include "opt_kdtrace.h"
@@ -53,6 +53,7 @@
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#include <sys/sdt.h>
#include <sys/smp.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
@@ -76,7 +77,7 @@
#include <machine/cpu.h>
#include <machine/smp.h>
-#if defined(__powerpc__) && defined(E500)
+#if defined(__powerpc__) && defined(BOOKE_E500)
#error "This architecture is not currently compatible with ULE"
#endif
@@ -327,6 +328,24 @@
SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
NULL);
+SDT_PROVIDER_DEFINE(sched);
+
+SDT_PROBE_DEFINE3(sched, , , change_pri, change-pri, "struct thread *",
+ "struct proc *", "uint8_t");
+SDT_PROBE_DEFINE3(sched, , , dequeue, dequeue, "struct thread *",
+ "struct proc *", "void *");
+SDT_PROBE_DEFINE4(sched, , , enqueue, enqueue, "struct thread *",
+ "struct proc *", "void *", "int");
+SDT_PROBE_DEFINE4(sched, , , lend_pri, lend-pri, "struct thread *",
+ "struct proc *", "uint8_t", "struct thread *");
+SDT_PROBE_DEFINE2(sched, , , load_change, load-change, "int", "int");
+SDT_PROBE_DEFINE2(sched, , , off_cpu, off-cpu, "struct thread *",
+ "struct proc *");
+SDT_PROBE_DEFINE(sched, , , on_cpu, on-cpu);
+SDT_PROBE_DEFINE(sched, , , remain_cpu, remain-cpu);
+SDT_PROBE_DEFINE2(sched, , , surrender, surrender, "struct thread *",
+ "struct proc *");
+
/*
* Print the threads waiting on a run-queue.
*/
@@ -509,6 +528,7 @@
if ((td->td_flags & TDF_NOLOAD) == 0)
tdq->tdq_sysload++;
KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load);
+ SDT_PROBE2(sched, , , load_change, (int)TDQ_ID(tdq), tdq->tdq_load);
}
/*
@@ -528,6 +548,7 @@
if ((td->td_flags & TDF_NOLOAD) == 0)
tdq->tdq_sysload--;
KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load);
+ SDT_PROBE2(sched, , , load_change, (int)TDQ_ID(tdq), tdq->tdq_load);
}
/*
@@ -1625,10 +1646,13 @@
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "prio",
"prio:%d", td->td_priority, "new prio:%d", prio,
KTR_ATTR_LINKED, sched_tdname(curthread));
+ SDT_PROBE3(sched, , , change_pri, td, td->td_proc, prio);
if (td != curthread && prio > td->td_priority) {
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread),
"lend prio", "prio:%d", td->td_priority, "new prio:%d",
prio, KTR_ATTR_LINKED, sched_tdname(td));
+ SDT_PROBE4(sched, , , lend_pri, td, td->td_proc, prio,
+ curthread);
}
ts = td->td_sched;
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -1879,6 +1903,7 @@
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
+ SDT_PROBE2(sched, , , off_cpu, td, td->td_proc);
lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
sched_pctcpu_update(newtd->td_sched, 0);
@@ -1903,12 +1928,16 @@
tdq = TDQ_CPU(cpuid);
lock_profile_obtain_lock_success(
&TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
+
+ SDT_PROBE0(sched, , , on_cpu);
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
#endif
- } else
+ } else {
thread_unblock_switch(td, mtx);
+ SDT_PROBE0(sched, , , remain_cpu);
+ }
/*
* Assert that all went well and return.
*/
@@ -2102,6 +2131,8 @@
{
struct tdq *tdq;
+ SDT_PROBE2(sched, , , surrender, td, td->td_proc);
+
thread_lock(td);
tdq = TDQ_SELF();
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
@@ -2330,6 +2361,8 @@
sched_tdname(curthread));
KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup",
KTR_ATTR_LINKED, sched_tdname(td));
+ SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL,
+ flags & SRQ_PREEMPTED);
THREAD_LOCK_ASSERT(td, MA_OWNED);
/*
* Recalculate the priority before we select the target cpu or
@@ -2375,6 +2408,7 @@
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "runq rem",
"prio:%d", td->td_priority);
+ SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL);
tdq = TDQ_CPU(td->td_sched->ts_cpu);
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_bus.c
--- a/head/sys/kern/subr_bus.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_bus.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_bus.c 234152 2012-04-11 20:57:41Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_bus.c 235978 2012-05-25 07:32:26Z avg $");
#include "opt_bus.h"
@@ -1909,6 +1909,8 @@
PDEBUG(("%s at %s with order %u as unit %d",
name, DEVICENAME(dev), order, unit));
+ KASSERT(name != NULL || unit == -1,
+ ("child device with wildcard name and specific unit number"));
child = make_device(dev, name, unit);
if (child == NULL)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_devstat.c
--- a/head/sys/kern/subr_devstat.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_devstat.c Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,9 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_devstat.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_devstat.c 238372 2012-07-11 18:50:50Z kib $");
+
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/kernel.h>
@@ -44,6 +46,58 @@
#include <machine/atomic.h>
+#ifdef KDTRACE_HOOKS
+#include <sys/dtrace_bsd.h>
+
+dtrace_io_start_probe_func_t dtrace_io_start_probe;
+dtrace_io_done_probe_func_t dtrace_io_done_probe;
+dtrace_io_wait_start_probe_func_t dtrace_io_wait_start_probe;
+dtrace_io_wait_done_probe_func_t dtrace_io_wait_done_probe;
+
+uint32_t dtio_start_id;
+uint32_t dtio_done_id;
+uint32_t dtio_wait_start_id;
+uint32_t dtio_wait_done_id;
+
+#define DTRACE_DEVSTAT_START() \
+ if (dtrace_io_start_probe != NULL) \
+ (*dtrace_io_start_probe)(dtio_start_id, NULL, ds);
+
+#define DTRACE_DEVSTAT_BIO_START() \
+ if (dtrace_io_start_probe != NULL) \
+ (*dtrace_io_start_probe)(dtio_start_id, bp, ds);
+
+#define DTRACE_DEVSTAT_DONE() \
+ if (dtrace_io_done_probe != NULL) \
+ (*dtrace_io_done_probe)(dtio_done_id, NULL, ds);
+
+#define DTRACE_DEVSTAT_BIO_DONE() \
+ if (dtrace_io_done_probe != NULL) \
+ (*dtrace_io_done_probe)(dtio_done_id, bp, ds);
+
+#define DTRACE_DEVSTAT_WAIT_START() \
+ if (dtrace_io_wait_start_probe != NULL) \
+ (*dtrace_io_wait_start_probe)(dtio_wait_start_id, NULL, ds);
+
+#define DTRACE_DEVSTAT_WAIT_DONE() \
+ if (dtrace_io_wait_done_probe != NULL) \
+ (*dtrace_io_wait_done_probe)(dtio_wait_done_id, NULL, ds);
+
+#else /* ! KDTRACE_HOOKS */
+
+#define DTRACE_DEVSTAT_START()
+
+#define DTRACE_DEVSTAT_BIO_START()
+
+#define DTRACE_DEVSTAT_DONE()
+
+#define DTRACE_DEVSTAT_BIO_DONE()
+
+#define DTRACE_DEVSTAT_WAIT_START()
+
+#define DTRACE_DEVSTAT_WAIT_DONE()
+#endif /* KDTRACE_HOOKS */
+
static int devstat_num_devs;
static long devstat_generation = 1;
static int devstat_version = DEVSTAT_VERSION;
@@ -227,6 +281,7 @@
}
ds->start_count++;
atomic_add_rel_int(&ds->sequence0, 1);
+ DTRACE_DEVSTAT_START();
}
void
@@ -241,6 +296,7 @@
binuptime(&bp->bio_t0);
devstat_start_transaction(ds, &bp->bio_t0);
+ DTRACE_DEVSTAT_BIO_START();
}
/*
@@ -312,6 +368,7 @@
ds->end_count++;
atomic_add_rel_int(&ds->sequence0, 1);
+ DTRACE_DEVSTAT_DONE();
}
void
@@ -334,6 +391,7 @@
devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid,
DEVSTAT_TAG_SIMPLE, flg, NULL, &bp->bio_t0);
+ DTRACE_DEVSTAT_BIO_DONE();
}
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_dummy_vdso_tc.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/kern/subr_dummy_vdso_tc.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,49 @@
+/*-
+ * Copyright 2012 Konstantin Belousov <kib at FreeBSD.ORG>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/kern/subr_dummy_vdso_tc.c 237433 2012-06-22 07:06:40Z kib $");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/vdso.h>
+
+uint32_t
+cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th)
+{
+
+ return (0);
+}
+
+#ifdef COMPAT_FREEBSD32
+uint32_t
+cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
+{
+
+ return (0);
+}
+#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_firmware.c
--- a/head/sys/kern/subr_firmware.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_firmware.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_firmware.c 234201 2012-04-13 04:22:42Z adrian $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_firmware.c 237546 2012-06-25 05:41:16Z kevlo $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -198,7 +198,7 @@
free(str, M_TEMP);
return NULL;
}
- bzero(frp, sizeof(frp)); /* start from a clean record */
+ bzero(frp, sizeof(*frp)); /* start from a clean record */
frp->fw.name = str;
frp->fw.data = data;
frp->fw.datasize = datasize;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_rman.c
--- a/head/sys/kern/subr_rman.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_rman.c Wed Jul 25 16:40:53 2012 +0300
@@ -58,7 +58,7 @@
#include "opt_ddb.h"
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_rman.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_rman.c 236359 2012-05-31 17:27:05Z imp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -161,6 +161,7 @@
rman_manage_region(struct rman *rm, u_long start, u_long end)
{
struct resource_i *r, *s, *t;
+ int rv = 0;
DPRINTF(("rman_manage_region: <%s> request: start %#lx, end %#lx\n",
rm->rm_descr, start, end));
@@ -188,13 +189,17 @@
TAILQ_INSERT_TAIL(&rm->rm_list, r, r_link);
} else {
/* Check for any overlap with the current region. */
- if (r->r_start <= s->r_end && r->r_end >= s->r_start)
- return EBUSY;
+ if (r->r_start <= s->r_end && r->r_end >= s->r_start) {
+ rv = EBUSY;
+ goto out;
+ }
/* Check for any overlap with the next region. */
t = TAILQ_NEXT(s, r_link);
- if (t && r->r_start <= t->r_end && r->r_end >= t->r_start)
- return EBUSY;
+ if (t && r->r_start <= t->r_end && r->r_end >= t->r_start) {
+ rv = EBUSY;
+ goto out;
+ }
/*
* See if this region can be merged with the next region. If
@@ -225,9 +230,9 @@
TAILQ_INSERT_BEFORE(s, r, r_link);
}
}
-
+out:
mtx_unlock(rm->rm_mtx);
- return 0;
+ return rv;
}
int
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_sleepqueue.c
--- a/head/sys/kern/subr_sleepqueue.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_sleepqueue.c Wed Jul 25 16:40:53 2012 +0300
@@ -60,10 +60,11 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_sleepqueue.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_sleepqueue.c 235459 2012-05-15 01:30:25Z rstone $");
#include "opt_sleepqueue_profiling.h"
#include "opt_ddb.h"
+#include "opt_kdtrace.h"
#include "opt_sched.h"
#include <sys/param.h>
@@ -75,6 +76,7 @@
#include <sys/proc.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/sleepqueue.h>
#include <sys/sysctl.h>
@@ -166,6 +168,9 @@
static void sleepq_switch(void *wchan, int pri);
static void sleepq_timeout(void *arg);
+SDT_PROBE_DECLARE(sched, , , sleep);
+SDT_PROBE_DECLARE(sched, , , wakeup);
+
/*
* Early initialization of sleep queues that is called from the sleepinit()
* SYSINIT.
@@ -534,6 +539,7 @@
MPASS(td->td_sleepqueue == NULL);
sched_sleep(td, pri);
thread_lock_set(td, &sc->sc_lock);
+ SDT_PROBE0(sched, , , sleep);
TD_SET_SLEEPING(td);
mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
@@ -715,6 +721,8 @@
sc = SC_LOOKUP(sq->sq_wchan);
mtx_assert(&sc->sc_lock, MA_OWNED);
+ SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
+
/* Remove the thread from the queue. */
sq->sq_blockedcnt[td->td_sqqueue]--;
TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_smp.c
--- a/head/sys/kern/subr_smp.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_smp.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_smp.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_smp.c 236906 2012-06-11 18:47:26Z iwasaki $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -55,6 +55,7 @@
#ifdef SMP
volatile cpuset_t stopped_cpus;
volatile cpuset_t started_cpus;
+volatile cpuset_t suspended_cpus;
cpuset_t hlt_cpus_mask;
cpuset_t logical_cpus_mask;
@@ -207,9 +208,10 @@
#endif
static volatile u_int stopping_cpu = NOCPU;
int i;
+ volatile cpuset_t *cpus;
KASSERT(
-#if defined(__amd64__)
+#if defined(__amd64__) || defined(__i386__)
type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
#else
type == IPI_STOP || type == IPI_STOP_HARD,
@@ -231,8 +233,15 @@
/* send the stop IPI to all CPUs in map */
ipi_selected(map, type);
+#if defined(__amd64__) || defined(__i386__)
+ if (type == IPI_SUSPEND)
+ cpus = &suspended_cpus;
+ else
+#endif
+ cpus = &stopped_cpus;
+
i = 0;
- while (!CPU_SUBSET(&stopped_cpus, &map)) {
+ while (!CPU_SUBSET(cpus, &map)) {
/* spin */
cpu_spinwait();
i++;
@@ -260,7 +269,7 @@
return (generic_stop_cpus(map, IPI_STOP_HARD));
}
-#if defined(__amd64__)
+#if defined(__amd64__) || defined(__i386__)
int
suspend_cpus(cpuset_t map)
{
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_syscall.c
--- a/head/sys/kern/subr_syscall.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_syscall.c Wed Jul 25 16:40:53 2012 +0300
@@ -42,7 +42,7 @@
#include "opt_ktrace.h"
#include "opt_kdtrace.h"
-__FBSDID("$FreeBSD: head/sys/kern/subr_syscall.c 234172 2012-04-12 10:48:43Z kib $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_syscall.c 236309 2012-05-30 13:44:42Z kib $");
#include <sys/capability.h>
#include <sys/ktr.h>
@@ -182,6 +182,12 @@
KASSERT(td->td_locks == 0,
("System call %s returning with %d locks held",
syscallname(p, sa->code), td->td_locks));
+ KASSERT((td->td_pflags & TDP_NOFAULTING) == 0,
+ ("System call %s returning with pagefaults disabled",
+ syscallname(p, sa->code)));
+ KASSERT((td->td_pflags & TDP_NOSLEEPING) == 0,
+ ("System call %s returning with sleep disabled",
+ syscallname(p, sa->code)));
/*
* Handle reschedule and other end-of-syscall issues
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_trap.c
--- a/head/sys/kern/subr_trap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_trap.c Wed Jul 25 16:40:53 2012 +0300
@@ -42,9 +42,8 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_trap.c 234494 2012-04-20 15:32:36Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_trap.c 236859 2012-06-10 20:24:01Z pjd $");
-#include "opt_capsicum.h"
#include "opt_hwpmc_hooks.h"
#include "opt_ktrace.h"
#include "opt_kdtrace.h"
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_turnstile.c
--- a/head/sys/kern/subr_turnstile.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_turnstile.c Wed Jul 25 16:40:53 2012 +0300
@@ -57,9 +57,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_turnstile.c 234303 2012-04-14 23:59:58Z davide $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_turnstile.c 235459 2012-05-15 01:30:25Z rstone $");
#include "opt_ddb.h"
+#include "opt_kdtrace.h"
#include "opt_turnstile_profiling.h"
#include "opt_sched.h"
@@ -73,6 +74,7 @@
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/sched.h>
+#include <sys/sdt.h>
#include <sys/sysctl.h>
#include <sys/turnstile.h>
@@ -167,6 +169,11 @@
static int turnstile_init(void *mem, int size, int flags);
static void turnstile_fini(void *mem, int size);
+SDT_PROVIDER_DECLARE(sched);
+SDT_PROBE_DEFINE(sched, , , sleep, sleep);
+SDT_PROBE_DEFINE2(sched, , , wakeup, wakeup, "struct thread *",
+ "struct proc *");
+
/*
* Walks the chain of turnstiles and their owners to propagate the priority
* of the thread being blocked to all the threads holding locks that have to
@@ -740,6 +747,8 @@
CTR4(KTR_LOCK, "%s: td %d blocked on [%p] %s", __func__,
td->td_tid, lock, lock->lo_name);
+ SDT_PROBE0(sched, , , sleep);
+
THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
mi_switch(SW_VOL | SWT_TURNSTILE, NULL);
@@ -916,6 +925,7 @@
while (!TAILQ_EMPTY(&pending_threads)) {
td = TAILQ_FIRST(&pending_threads);
TAILQ_REMOVE(&pending_threads, td, td_lockq);
+ SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
thread_lock(td);
THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
MPASS(td->td_proc->p_magic == P_MAGIC);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/subr_witness.c
--- a/head/sys/kern/subr_witness.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/subr_witness.c Wed Jul 25 16:40:53 2012 +0300
@@ -85,7 +85,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/subr_witness.c 233937 2012-04-06 06:53:58Z melifaro $");
+__FBSDID("$FreeBSD: head/sys/kern/subr_witness.c 237623 2012-06-27 03:45:25Z alc $");
#include "opt_ddb.h"
#include "opt_hwpmc_hooks.h"
@@ -564,7 +564,7 @@
*/
{ "bpf global lock", &lock_class_mtx_sleep },
{ "bpf interface lock", &lock_class_rw },
- { "bpf cdev lock", &lock_class_rw },
+ { "bpf cdev lock", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* NFS server
@@ -593,19 +593,22 @@
/*
* CDEV
*/
- { "system map", &lock_class_mtx_sleep },
- { "vm page queue mutex", &lock_class_mtx_sleep },
+ { "vm map (system)", &lock_class_mtx_sleep },
+ { "vm page queue", &lock_class_mtx_sleep },
{ "vnode interlock", &lock_class_mtx_sleep },
{ "cdev", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* VM
- *
*/
+ { "vm map (user)", &lock_class_sx },
{ "vm object", &lock_class_mtx_sleep },
- { "page lock", &lock_class_mtx_sleep },
- { "vm page queue mutex", &lock_class_mtx_sleep },
+ { "vm page", &lock_class_mtx_sleep },
+ { "vm page queue", &lock_class_mtx_sleep },
+ { "pmap pv global", &lock_class_rw },
{ "pmap", &lock_class_mtx_sleep },
+ { "pmap pv list", &lock_class_rw },
+ { "vm page free queue", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* kqueue/VFS interaction
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/sys_capability.c
--- a/head/sys/kern/sys_capability.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/sys_capability.c Wed Jul 25 16:40:53 2012 +0300
@@ -51,12 +51,12 @@
* anonymous, rather than named, POSIX shared memory objects.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/kern/sys_capability.c 236858 2012-06-10 20:22:10Z pjd $");
+
#include "opt_capsicum.h"
#include "opt_ktrace.h"
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/sys_capability.c 232860 2012-03-12 11:56:57Z pho $");
-
#include <sys/param.h>
#include <sys/capability.h>
#include <sys/file.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/sys_generic.c
--- a/head/sys/kern/sys_generic.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/sys_generic.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/sys_generic.c 232494 2012-03-04 14:55:37Z kib $");
+__FBSDID("$FreeBSD: head/sys/kern/sys_generic.c 237195 2012-06-17 13:03:50Z davide $");
#include "opt_capsicum.h"
#include "opt_compat.h"
@@ -1255,7 +1255,7 @@
struct pollfd *bits;
struct pollfd smallbits[32];
struct timeval atv, rtv, ttv;
- int error = 0, timo;
+ int error, timo;
u_int nfds;
size_t ni;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/sys_procdesc.c
--- a/head/sys/kern/sys_procdesc.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/sys_procdesc.c Wed Jul 25 16:40:53 2012 +0300
@@ -59,7 +59,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/sys_procdesc.c 225617 2011-09-16 13:58:51Z kmacy $");
+__FBSDID("$FreeBSD: head/sys/kern/sys_procdesc.c 237277 2012-06-19 22:23:59Z pjd $");
#include "opt_procdesc.h"
@@ -338,7 +338,7 @@
/*
* procdesc_close() - last close on a process descriptor. If the process is
- * still running, terminate with SIGKILL (unless PD_DAEMON is set) and let
+ * still running, terminate with SIGKILL (unless PDF_DAEMON is set) and let
* init(8) clean up the mess; if not, we have to clean up the zombie ourselves.
*/
static int
@@ -386,7 +386,7 @@
*/
p->p_sigparent = SIGCHLD;
proc_reparent(p, initproc);
- if ((pd->pd_flags & PD_DAEMON) == 0)
+ if ((pd->pd_flags & PDF_DAEMON) == 0)
kern_psignal(p, SIGKILL);
PROC_UNLOCK(p);
sx_xunlock(&proctree_lock);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/sys_process.c
--- a/head/sys/kern/sys_process.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/sys_process.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/sys_process.c 232048 2012-02-23 11:50:23Z kib $");
+__FBSDID("$FreeBSD: head/sys/kern/sys_process.c 238287 2012-07-09 09:24:46Z davidxu $");
#include "opt_compat.h"
@@ -635,7 +635,7 @@
struct iovec iov;
struct uio uio;
struct proc *curp, *p, *pp;
- struct thread *td2 = NULL;
+ struct thread *td2 = NULL, *td3;
struct ptrace_io_desc *piod = NULL;
struct ptrace_lwpinfo *pl;
int error, write, tmp, num;
@@ -953,10 +953,8 @@
td2->td_xsig = data;
if (req == PT_DETACH) {
- struct thread *td3;
- FOREACH_THREAD_IN_PROC(p, td3) {
+ FOREACH_THREAD_IN_PROC(p, td3)
td3->td_dbgflags &= ~TDB_SUSPEND;
- }
}
/*
* unsuspend all threads, to not let a thread run,
@@ -967,6 +965,8 @@
p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED);
thread_unsuspend(p);
PROC_SUNLOCK(p);
+ if (req == PT_ATTACH)
+ kern_psignal(p, data);
} else {
if (data)
kern_psignal(p, data);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/syscalls.c
--- a/head/sys/kern/syscalls.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/syscalls.c Wed Jul 25 16:40:53 2012 +0300
@@ -2,8 +2,8 @@
* System call names.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: head/sys/kern/syscalls.c 227776 2011-11-21 01:26:10Z lstewart $
- * created from FreeBSD: head/sys/kern/syscalls.master 227691 2011-11-19 06:35:15Z ed
+ * $FreeBSD: head/sys/kern/syscalls.c 236027 2012-05-25 21:52:57Z ed $
+ * created from FreeBSD: head/sys/kern/syscalls.master 236026 2012-05-25 21:50:48Z ed
*/
const char *syscallnames[] = {
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/syscalls.master
--- a/head/sys/kern/syscalls.master Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/syscalls.master Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
- $FreeBSD: head/sys/kern/syscalls.master 227776 2011-11-21 01:26:10Z lstewart $
+ $FreeBSD: head/sys/kern/syscalls.master 236026 2012-05-25 21:50:48Z ed $
; from: @(#)syscalls.master 8.2 (Berkeley) 1/13/94
;
; System call name/number master file.
@@ -916,9 +916,9 @@
512 AUE_SHMCTL NOSTD { int shmctl(int shmid, int cmd, \
struct shmid_ds *buf); }
513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); }
-514 AUE_CAP_NEW STD { int cap_new(int fd, u_int64_t rights); }
+514 AUE_CAP_NEW STD { int cap_new(int fd, uint64_t rights); }
515 AUE_CAP_GETRIGHTS STD { int cap_getrights(int fd, \
- u_int64_t *rightsp); }
+ uint64_t *rightsp); }
516 AUE_CAP_ENTER STD { int cap_enter(void); }
517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); }
518 AUE_PDFORK STD { int pdfork(int *fdp, int flags); }
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/systrace_args.c
--- a/head/sys/kern/systrace_args.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/systrace_args.c Wed Jul 25 16:40:53 2012 +0300
@@ -2,7 +2,7 @@
* System call argument to DTrace register array converstion.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: head/sys/kern/systrace_args.c 227776 2011-11-21 01:26:10Z lstewart $
+ * $FreeBSD: head/sys/kern/systrace_args.c 236027 2012-05-25 21:52:57Z ed $
* This file is part of the DTrace syscall provider.
*/
@@ -3121,7 +3121,7 @@
case 514: {
struct cap_new_args *p = params;
iarg[0] = p->fd; /* int */
- uarg[1] = p->rights; /* u_int64_t */
+ uarg[1] = p->rights; /* uint64_t */
*n_args = 2;
break;
}
@@ -3129,7 +3129,7 @@
case 515: {
struct cap_getrights_args *p = params;
iarg[0] = p->fd; /* int */
- uarg[1] = (intptr_t) p->rightsp; /* u_int64_t * */
+ uarg[1] = (intptr_t) p->rightsp; /* uint64_t * */
*n_args = 2;
break;
}
@@ -8434,7 +8434,7 @@
p = "int";
break;
case 1:
- p = "u_int64_t";
+ p = "uint64_t";
break;
default:
break;
@@ -8447,7 +8447,7 @@
p = "int";
break;
case 1:
- p = "u_int64_t *";
+ p = "uint64_t *";
break;
default:
break;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/tty.c
--- a/head/sys/kern/tty.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/tty.c Wed Jul 25 16:40:53 2012 +0300
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/tty.c 232197 2012-02-26 20:56:49Z phk $");
+__FBSDID("$FreeBSD: head/sys/kern/tty.c 237219 2012-06-18 07:34:38Z pho $");
#include "opt_capsicum.h"
#include "opt_compat.h"
@@ -219,9 +219,15 @@
static int
ttydev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
- struct tty *tp = dev->si_drv1;
+ struct tty *tp;
int error = 0;
+ while ((tp = dev->si_drv1) == NULL) {
+ error = tsleep(&dev->si_drv1, PCATCH, "ttdrv1", 1);
+ if (error != EWOULDBLOCK)
+ return (error);
+ }
+
tty_lock(tp);
if (tty_gone(tp)) {
/* Device is already gone. */
@@ -738,9 +744,14 @@
static int
ttyil_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
- struct tty *tp = dev->si_drv1;
+ struct tty *tp;
int error = 0;
+ while ((tp = dev->si_drv1) == NULL) {
+ error = tsleep(&dev->si_drv1, PCATCH, "ttdrv1", 1);
+ if (error != EWOULDBLOCK)
+ return (error);
+ }
tty_lock(tp);
if (tty_gone(tp))
error = ENODEV;
@@ -1203,6 +1214,7 @@
dev = make_dev_cred(&ttydev_cdevsw, 0, cred,
uid, gid, mode, "%s%s", prefix, name);
dev->si_drv1 = tp;
+ wakeup(&dev->si_drv1);
tp->t_dev = dev;
/* Slave call-in devices. */
@@ -1211,12 +1223,14 @@
uid, gid, mode, "%s%s.init", prefix, name);
dev_depends(tp->t_dev, dev);
dev->si_drv1 = tp;
+ wakeup(&dev->si_drv1);
dev->si_drv2 = &tp->t_termios_init_in;
dev = make_dev_cred(&ttyil_cdevsw, TTYUNIT_LOCK, cred,
uid, gid, mode, "%s%s.lock", prefix, name);
dev_depends(tp->t_dev, dev);
dev->si_drv1 = tp;
+ wakeup(&dev->si_drv1);
dev->si_drv2 = &tp->t_termios_lock_in;
}
@@ -1226,6 +1240,7 @@
UID_UUCP, GID_DIALER, 0660, "cua%s", name);
dev_depends(tp->t_dev, dev);
dev->si_drv1 = tp;
+ wakeup(&dev->si_drv1);
/* Slave call-out devices. */
if (tp->t_flags & TF_INITLOCK) {
@@ -1234,6 +1249,7 @@
UID_UUCP, GID_DIALER, 0660, "cua%s.init", name);
dev_depends(tp->t_dev, dev);
dev->si_drv1 = tp;
+ wakeup(&dev->si_drv1);
dev->si_drv2 = &tp->t_termios_init_out;
dev = make_dev_cred(&ttyil_cdevsw,
@@ -1241,6 +1257,7 @@
UID_UUCP, GID_DIALER, 0660, "cua%s.lock", name);
dev_depends(tp->t_dev, dev);
dev->si_drv1 = tp;
+ wakeup(&dev->si_drv1);
dev->si_drv2 = &tp->t_termios_lock_out;
}
}
@@ -1817,9 +1834,6 @@
{
struct tty *tp;
struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_cap;
-#endif
struct cdev *dev;
struct cdevsw *cdp;
struct filedesc *fdp;
@@ -1838,10 +1852,9 @@
}
#ifdef CAPABILITIES
- fp_cap = fp;
- error = cap_funwrap(fp_cap, CAP_TTYHOOK, &fp);
+ error = cap_funwrap(fp, CAP_TTYHOOK, &fp);
if (error)
- return (error);
+ goto done1;
#endif
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/uipc_mqueue.c
--- a/head/sys/kern/uipc_mqueue.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/uipc_mqueue.c Wed Jul 25 16:40:53 2012 +0300
@@ -43,7 +43,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/uipc_mqueue.c 229272 2012-01-02 12:12:10Z ed $");
+__FBSDID("$FreeBSD: head/sys/kern/uipc_mqueue.c 234607 2012-04-23 14:10:34Z trasz $");
#include "opt_compat.h"
@@ -703,7 +703,7 @@
{
struct vnode *vp = (struct vnode *)context;
- vrecycle(vp, curthread);
+ vrecycle(vp);
vdrop(vp);
}
@@ -1065,7 +1065,7 @@
struct mqfs_node *pn = VTON(ap->a_vp);
if (pn->mn_deleted)
- vrecycle(ap->a_vp, ap->a_td);
+ vrecycle(ap->a_vp);
return (0);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/uipc_socket.c
--- a/head/sys/kern/uipc_socket.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/uipc_socket.c Wed Jul 25 16:40:53 2012 +0300
@@ -101,7 +101,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/uipc_socket.c 233850 2012-04-03 18:38:00Z np $");
+__FBSDID("$FreeBSD: head/sys/kern/uipc_socket.c 238085 2012-07-03 19:08:02Z trociny $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -635,7 +635,7 @@
so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
if (so->so_options & SO_ACCEPTCONN) {
KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated"));
}
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/uipc_syscalls.c
--- a/head/sys/kern/uipc_syscalls.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/uipc_syscalls.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 233004 2012-03-15 14:13:38Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 236891 2012-06-11 16:08:03Z pjd $");
#include "opt_capsicum.h"
#include "opt_inet.h"
@@ -134,8 +134,7 @@
int error;
#endif
- fp = NULL;
- if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
+ if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
return (EBADF);
#ifdef CAPABILITIES
/*
@@ -179,7 +178,6 @@
int protocol;
} */ *uap;
{
- struct filedesc *fdp;
struct socket *so;
struct file *fp;
int fd, error;
@@ -191,7 +189,6 @@
if (error)
return (error);
#endif
- fdp = td->td_proc->p_fd;
error = falloc(td, &fp, &fd, 0);
if (error)
return (error);
@@ -199,7 +196,7 @@
error = socreate(uap->domain, &so, uap->type, uap->protocol,
td->td_ucred, td);
if (error) {
- fdclose(fdp, fp, fd, td);
+ fdclose(td->td_proc->p_fd, fp, fd, td);
} else {
finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
td->td_retval[0] = fd;
@@ -1962,6 +1959,7 @@
* and takes care of the overall progress.
*/
for (off = uap->offset, rem = uap->nbytes; ; ) {
+ struct mbuf *mtail = NULL;
int loopbytes = 0;
int space = 0;
int done = 0;
@@ -2181,10 +2179,13 @@
m0->m_len = xfsize;
/* Append to mbuf chain. */
- if (m != NULL)
- m_cat(m, m0);
+ if (mtail != NULL)
+ mtail->m_next = m0;
+ else if (m != NULL)
+ m_last(m)->m_next = m0;
else
m = m0;
+ mtail = m0;
/* Keep track of bits processed. */
loopbytes += xfsize;
@@ -2309,25 +2310,23 @@
} */ *uap;
{
#if (defined(INET) || defined(INET6)) && defined(SCTP)
- struct filedesc *fdp;
struct file *nfp = NULL;
int error;
struct socket *head, *so;
int fd;
u_int fflag;
- fdp = td->td_proc->p_fd;
AUDIT_ARG_FD(uap->sd);
error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
if (error)
goto done2;
if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
error = EOPNOTSUPP;
- goto done2;
+ goto done;
}
error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
if (error)
- goto done2;
+ goto done;
/*
* At this point we know we do have a assoc to pull
* we proceed to get the fd setup. This may block
@@ -2374,7 +2373,7 @@
* out from under us.
*/
if (error)
- fdclose(fdp, nfp, fd, td);
+ fdclose(td->td_proc->p_fd, nfp, fd, td);
/*
* Release explicitly held references before returning.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/uipc_usrreq.c
--- a/head/sys/kern/uipc_usrreq.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/uipc_usrreq.c Wed Jul 25 16:40:53 2012 +0300
@@ -57,7 +57,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/uipc_usrreq.c 232317 2012-02-29 21:38:31Z trociny $");
+__FBSDID("$FreeBSD: head/sys/kern/uipc_usrreq.c 237036 2012-06-13 22:12:10Z pjd $");
#include "opt_ddb.h"
@@ -1872,7 +1872,7 @@
FILEDESC_SLOCK(fdescp);
for (i = 0; i < oldfds; i++) {
fd = *fdp++;
- if ((unsigned)fd >= fdescp->fd_nfiles ||
+ if (fd < 0 || fd >= fdescp->fd_nfiles ||
fdescp->fd_ofiles[fd] == NULL) {
FILEDESC_SUNLOCK(fdescp);
error = EBADF;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/vfs_bio.c
--- a/head/sys/kern/vfs_bio.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/vfs_bio.c Wed Jul 25 16:40:53 2012 +0300
@@ -39,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/vfs_bio.c 232351 2012-03-01 18:45:25Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/kern/vfs_bio.c 236487 2012-06-02 19:39:12Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -2640,8 +2640,8 @@
if (bp != NULL) {
int lockflags;
/*
- * Buffer is in-core. If the buffer is not busy, it must
- * be on a queue.
+ * Buffer is in-core. If the buffer is not busy nor managed,
+ * it must be on a queue.
*/
lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK;
@@ -2671,9 +2671,13 @@
bp->b_flags &= ~B_CACHE;
else if ((bp->b_flags & (B_VMIO | B_INVAL)) == 0)
bp->b_flags |= B_CACHE;
- BO_LOCK(bo);
- bremfree(bp);
- BO_UNLOCK(bo);
+ if (bp->b_flags & B_MANAGED)
+ MPASS(bp->b_qindex == QUEUE_NONE);
+ else {
+ BO_LOCK(bo);
+ bremfree(bp);
+ BO_UNLOCK(bo);
+ }
/*
* check for size inconsistancies for non-VMIO case.
@@ -3991,7 +3995,9 @@
}
db_printf("buf at %p\n", bp);
- db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS);
+ db_printf("b_flags = 0x%b, b_xflags=0x%b, b_vflags=0x%b\n",
+ (u_int)bp->b_flags, PRINT_BUF_FLAGS, (u_int)bp->b_xflags,
+ PRINT_BUF_XFLAGS, (u_int)bp->b_vflags, PRINT_BUF_VFLAGS);
db_printf(
"b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n"
"b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_lblkno = %jd, "
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/vfs_default.c
--- a/head/sys/kern/vfs_default.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/vfs_default.c Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 234386 2012-04-17 16:28:22Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 236825 2012-06-09 22:26:53Z mckusick $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -343,8 +343,8 @@
if (error)
goto out;
- if ((dp->d_type != DT_WHT) &&
- !strcmp(dp->d_name, dirname)) {
+ if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
+ strcmp(dp->d_name, dirname) == 0) {
found = 1;
goto out;
}
@@ -646,8 +646,17 @@
if ((bp->b_vflags & BV_SCANNED) != 0)
continue;
bp->b_vflags |= BV_SCANNED;
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
- continue;
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
+ if (ap->a_waitfor != MNT_WAIT)
+ continue;
+ if (BUF_LOCK(bp,
+ LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL,
+ BO_MTX(bo)) != 0) {
+ BO_LOCK(bo);
+ goto loop1;
+ }
+ BO_LOCK(bo);
+ }
BO_UNLOCK(bo);
KASSERT(bp->b_bufobj == bo,
("bp %p wrong b_bufobj %p should be %p",
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/vfs_subr.c
--- a/head/sys/kern/vfs_subr.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/vfs_subr.c Wed Jul 25 16:40:53 2012 +0300
@@ -39,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/vfs_subr.c 234483 2012-04-20 07:00:28Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/kern/vfs_subr.c 236503 2012-06-03 08:01:12Z avg $");
#include "opt_ddb.h"
#include "opt_watchdog.h"
@@ -73,9 +73,7 @@
#include <sys/syslog.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
-#ifdef SW_WATCHDOG
#include <sys/watchdog.h>
-#endif
#include <machine/stdarg.h>
@@ -1027,6 +1025,7 @@
if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
vp->v_vflag |= VV_NOKNOTE;
}
+ rangelock_init(&vp->v_rl);
*vpp = vp;
return (0);
@@ -1327,8 +1326,7 @@
* sync activity.
*/
int
-vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
- off_t length, int blksize)
+vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
{
struct buf *bp, *nbp;
int anyfreed;
@@ -1869,10 +1867,10 @@
LIST_INSERT_HEAD(next, bo, bo_synclist);
continue;
}
-#ifdef SW_WATCHDOG
+
if (first_printf == 0)
wdog_kern_pat(WD_LASTVAL);
-#endif
+
}
if (!LIST_EMPTY(gslp)) {
mtx_unlock(&sync_mtx);
@@ -2469,6 +2467,7 @@
/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
vp->v_op = NULL;
#endif
+ rangelock_destroy(&vp->v_rl);
lockdestroy(vp->v_vnlock);
mtx_destroy(&vp->v_interlock);
mtx_destroy(BO_MTX(bo));
@@ -2660,7 +2659,7 @@
* Recycle an unused vnode to the front of the free list.
*/
int
-vrecycle(struct vnode *vp, struct thread *td)
+vrecycle(struct vnode *vp)
{
int recycled;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/vfs_syscalls.c
--- a/head/sys/kern/vfs_syscalls.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/vfs_syscalls.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 234489 2012-04-20 10:08:30Z jh $");
+__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 238029 2012-07-02 21:01:03Z kib $");
#include "opt_capsicum.h"
#include "opt_compat.h"
@@ -1093,8 +1093,7 @@
struct file *fp;
struct vnode *vp;
int cmode;
- struct file *nfp;
- int type, indx = -1, error, error_open;
+ int type, indx = -1, error;
struct flock lf;
struct nameidata nd;
int vfslocked;
@@ -1111,19 +1110,22 @@
if (flags & O_EXEC) {
if (flags & O_ACCMODE)
return (EINVAL);
- } else if ((flags & O_ACCMODE) == O_ACCMODE)
+ } else if ((flags & O_ACCMODE) == O_ACCMODE) {
return (EINVAL);
- else
+ } else {
flags = FFLAGS(flags);
+ }
/*
- * allocate the file descriptor, but don't install a descriptor yet
+ * Allocate the file descriptor, but don't install a descriptor yet.
*/
- error = falloc_noinstall(td, &nfp);
+ error = falloc_noinstall(td, &fp);
if (error)
return (error);
- /* An extra reference on `nfp' has been held for us by falloc_noinstall(). */
- fp = nfp;
+ /*
+ * An extra reference on `fp' has been held for us by
+ * falloc_noinstall().
+ */
/* Set the flags early so the finit in devfs can pick them up. */
fp->f_flag = flags & FMASK;
cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
@@ -1141,36 +1143,24 @@
goto success;
/*
- * handle special fdopen() case. bleh. dupfdopen() is
- * responsible for dropping the old contents of ofiles[indx]
- * if it succeeds.
+ * Handle special fdopen() case. bleh.
*
* Don't do this for relative (capability) lookups; we don't
* understand exactly what would happen, and we don't think
* that it ever should.
*/
- if ((nd.ni_strictrelative == 0) &&
+ if (nd.ni_strictrelative == 0 &&
(error == ENODEV || error == ENXIO) &&
- (td->td_dupfd >= 0)) {
- /* XXX from fdopen */
- error_open = error;
- if ((error = finstall(td, fp, &indx, flags)) != 0)
- goto bad_unlocked;
- if ((error = dupfdopen(td, fdp, indx, td->td_dupfd,
- flags, error_open)) == 0)
+ td->td_dupfd >= 0) {
+ error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
+ &indx);
+ if (error == 0)
goto success;
}
- /*
- * Clean up the descriptor, but only if another thread hadn't
- * replaced or closed it.
- */
- if (indx != -1)
- fdclose(fdp, fp, indx, td);
- fdrop(fp, td);
if (error == ERESTART)
error = EINTR;
- return (error);
+ goto bad_unlocked;
}
td->td_dupfd = 0;
vfslocked = NDHASGIANT(&nd);
@@ -1206,7 +1196,7 @@
if ((flags & FNONBLOCK) == 0)
type |= F_WAIT;
if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
- type)) != 0)
+ type)) != 0)
goto bad;
atomic_set_int(&fp->f_flag, FHASLOCK);
}
@@ -1247,10 +1237,8 @@
bad:
VFS_UNLOCK_GIANT(vfslocked);
bad_unlocked:
- if (indx != -1)
- fdclose(fdp, fp, indx, td);
+ KASSERT(indx == -1, ("indx=%d, should be -1", indx));
fdrop(fp, td);
- td->td_retval[0] = -1;
return (error);
}
@@ -1993,7 +1981,7 @@
struct file *fp;
struct vnode *vp;
struct vattr vattr;
- off_t offset, size;
+ off_t foffset, offset, size;
int error, noneg;
int vfslocked;
@@ -2005,18 +1993,19 @@
return (ESPIPE);
}
vp = fp->f_vnode;
+ foffset = foffset_lock(fp, 0);
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
noneg = (vp->v_type != VCHR);
offset = uap->offset;
switch (uap->whence) {
case L_INCR:
if (noneg &&
- (fp->f_offset < 0 ||
- (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
+ (foffset < 0 ||
+ (offset > 0 && foffset > OFF_MAX - offset))) {
error = EOVERFLOW;
break;
}
- offset += fp->f_offset;
+ offset += foffset;
break;
case L_XTND:
vn_lock(vp, LK_SHARED | LK_RETRY);
@@ -2056,12 +2045,12 @@
error = EINVAL;
if (error != 0)
goto drop;
- fp->f_offset = offset;
VFS_KNOTE_UNLOCKED(vp, 0);
- *(off_t *)(td->td_retval) = fp->f_offset;
+ *(off_t *)(td->td_retval) = offset;
drop:
fdrop(fp, td);
VFS_UNLOCK_GIANT(vfslocked);
+ foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
return (error);
}
@@ -3994,6 +3983,7 @@
caddr_t dirbuf;
int error, eofflag, readcnt, vfslocked;
long loff;
+ off_t foffset;
/* XXX arbitrary sanity limit on `count'. */
if (uap->count > 64 * 1024)
@@ -4006,10 +3996,12 @@
return (EBADF);
}
vp = fp->f_vnode;
+ foffset = foffset_lock(fp, 0);
unionread:
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
if (vp->v_type != VDIR) {
VFS_UNLOCK_GIANT(vfslocked);
+ foffset_unlock(fp, foffset, 0);
fdrop(fp, td);
return (EINVAL);
}
@@ -4022,12 +4014,13 @@
auio.uio_td = td;
auio.uio_resid = uap->count;
vn_lock(vp, LK_SHARED | LK_RETRY);
- loff = auio.uio_offset = fp->f_offset;
+ loff = auio.uio_offset = foffset;
#ifdef MAC
error = mac_vnode_check_readdir(td->td_ucred, vp);
if (error) {
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
+ foffset_unlock(fp, foffset, FOF_NOUPDATE);
fdrop(fp, td);
return (error);
}
@@ -4036,7 +4029,7 @@
if (vp->v_mount->mnt_maxsymlinklen <= 0) {
error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
NULL, NULL);
- fp->f_offset = auio.uio_offset;
+ foffset = auio.uio_offset;
} else
# endif
{
@@ -4048,7 +4041,7 @@
kiov.iov_base = dirbuf;
error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
NULL, NULL);
- fp->f_offset = kuio.uio_offset;
+ foffset = kuio.uio_offset;
if (error == 0) {
readcnt = uap->count - kuio.uio_resid;
edp = (struct dirent *)&dirbuf[readcnt];
@@ -4086,6 +4079,7 @@
if (error) {
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
+ foffset_unlock(fp, foffset, 0);
fdrop(fp, td);
return (error);
}
@@ -4097,13 +4091,14 @@
VREF(vp);
fp->f_vnode = vp;
fp->f_data = vp;
- fp->f_offset = 0;
+ foffset = 0;
vput(tvp);
VFS_UNLOCK_GIANT(vfslocked);
goto unionread;
}
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
+ foffset_unlock(fp, foffset, 0);
fdrop(fp, td);
td->td_retval[0] = uap->count - auio.uio_resid;
if (error == 0)
@@ -4136,7 +4131,8 @@
long base;
int error;
- error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
+ error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
+ NULL, UIO_USERSPACE);
if (error)
return (error);
if (uap->basep != NULL)
@@ -4146,7 +4142,7 @@
int
kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
- long *basep)
+ long *basep, ssize_t *residp, enum uio_seg bufseg)
{
struct vnode *vp;
struct file *fp;
@@ -4155,6 +4151,7 @@
int vfslocked;
long loff;
int error, eofflag;
+ off_t foffset;
AUDIT_ARG_FD(fd);
if (count > IOSIZE_MAX)
@@ -4168,6 +4165,7 @@
return (EBADF);
}
vp = fp->f_vnode;
+ foffset = foffset_lock(fp, 0);
unionread:
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
if (vp->v_type != VDIR) {
@@ -4180,18 +4178,18 @@
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_rw = UIO_READ;
- auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_segflg = bufseg;
auio.uio_td = td;
vn_lock(vp, LK_SHARED | LK_RETRY);
AUDIT_ARG_VNODE1(vp);
- loff = auio.uio_offset = fp->f_offset;
+ loff = auio.uio_offset = foffset;
#ifdef MAC
error = mac_vnode_check_readdir(td->td_ucred, vp);
if (error == 0)
#endif
error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
NULL);
- fp->f_offset = auio.uio_offset;
+ foffset = auio.uio_offset;
if (error) {
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
@@ -4205,7 +4203,7 @@
VREF(vp);
fp->f_vnode = vp;
fp->f_data = vp;
- fp->f_offset = 0;
+ foffset = 0;
vput(tvp);
VFS_UNLOCK_GIANT(vfslocked);
goto unionread;
@@ -4213,8 +4211,11 @@
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
*basep = loff;
+ if (residp != NULL)
+ *residp = auio.uio_resid;
td->td_retval[0] = count - auio.uio_resid;
fail:
+ foffset_unlock(fp, foffset, 0);
fdrop(fp, td);
return (error);
}
@@ -4334,12 +4335,10 @@
struct file *fp;
#ifdef CAPABILITIES
struct file *fp_fromcap;
+ int error;
#endif
- int error;
-
- error = 0;
- fp = NULL;
- if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL)
+
+ if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
return (EBADF);
#ifdef CAPABILITIES
/*
@@ -4481,24 +4480,19 @@
int flags;
} */ *uap;
{
- struct proc *p = td->td_proc;
struct mount *mp;
struct vnode *vp;
struct fhandle fhp;
- struct vattr vat;
- struct vattr *vap = &vat;
struct flock lf;
struct file *fp;
- register struct filedesc *fdp = p->p_fd;
int fmode, error, type;
- accmode_t accmode;
- struct file *nfp;
int vfslocked;
int indx;
error = priv_check(td, PRIV_VFS_FHOPEN);
if (error)
return (error);
+ indx = -1;
fmode = FFLAGS(uap->flags);
/* why not allow a non-read/write open for our lockd? */
if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
@@ -4514,109 +4508,42 @@
/* now give me my vnode, it gets returned to me locked */
error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
vfs_unbusy(mp);
- if (error)
- goto out;
+ if (error) {
+ VFS_UNLOCK_GIANT(vfslocked);
+ return (error);
+ }
+
+ error = falloc_noinstall(td, &fp);
+ if (error) {
+ vput(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ return (error);
+ }
/*
- * from now on we have to make sure not
- * to forget about the vnode
- * any error that causes an abort must vput(vp)
- * just set error = err and 'goto bad;'.
+ * An extra reference on `fp' has been held for us by
+ * falloc_noinstall().
*/
- /*
- * from vn_open
- */
- if (vp->v_type == VLNK) {
- error = EMLINK;
+#ifdef INVARIANTS
+ td->td_dupfd = -1;
+#endif
+ error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
+ if (error) {
+ KASSERT(fp->f_ops == &badfileops,
+ ("VOP_OPEN in fhopen() set f_ops"));
+ KASSERT(td->td_dupfd < 0,
+ ("fhopen() encountered fdopen()"));
+
+ vput(vp);
goto bad;
}
- if (vp->v_type == VSOCK) {
- error = EOPNOTSUPP;
- goto bad;
- }
- if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
- error = ENOTDIR;
- goto bad;
- }
- accmode = 0;
- if (fmode & (FWRITE | O_TRUNC)) {
- if (vp->v_type == VDIR) {
- error = EISDIR;
- goto bad;
- }
- error = vn_writechk(vp);
- if (error)
- goto bad;
- accmode |= VWRITE;
- }
- if (fmode & FREAD)
- accmode |= VREAD;
- if ((fmode & O_APPEND) && (fmode & FWRITE))
- accmode |= VAPPEND;
-#ifdef MAC
- error = mac_vnode_check_open(td->td_ucred, vp, accmode);
- if (error)
- goto bad;
+#ifdef INVARIANTS
+ td->td_dupfd = 0;
#endif
- if (accmode) {
- error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
- if (error)
- goto bad;
- }
- if (fmode & O_TRUNC) {
- vfs_ref(mp);
- VOP_UNLOCK(vp, 0); /* XXX */
- if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
- vrele(vp);
- vfs_rel(mp);
- goto out;
- }
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
- vfs_rel(mp);
-#ifdef MAC
- /*
- * We don't yet have fp->f_cred, so use td->td_ucred, which
- * should be right.
- */
- error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
- if (error == 0) {
-#endif
- VATTR_NULL(vap);
- vap->va_size = 0;
- error = VOP_SETATTR(vp, vap, td->td_ucred);
-#ifdef MAC
- }
-#endif
- vn_finished_write(mp);
- if (error)
- goto bad;
- }
- error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
- if (error)
- goto bad;
-
- if (fmode & FWRITE) {
- vp->v_writecount++;
- CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
- __func__, vp, vp->v_writecount);
- }
-
- /*
- * end of vn_open code
- */
-
- if ((error = falloc(td, &nfp, &indx, fmode)) != 0) {
- if (fmode & FWRITE) {
- vp->v_writecount--;
- CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
- __func__, vp, vp->v_writecount);
- }
- goto bad;
- }
- /* An extra reference on `nfp' has been held for us by falloc(). */
- fp = nfp;
- nfp->f_vnode = vp;
- finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
+ fp->f_vnode = vp;
+ fp->f_seqcount = 1;
+ finit(fp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
+ VOP_UNLOCK(vp, 0);
if (fmode & (O_EXLOCK | O_SHLOCK)) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
@@ -4628,36 +4555,22 @@
type = F_FLOCK;
if ((fmode & FNONBLOCK) == 0)
type |= F_WAIT;
- VOP_UNLOCK(vp, 0);
if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
- type)) != 0) {
- /*
- * The lock request failed. Normally close the
- * descriptor but handle the case where someone might
- * have dup()d or close()d it when we weren't looking.
- */
- fdclose(fdp, fp, indx, td);
-
- /*
- * release our private reference
- */
- fdrop(fp, td);
- goto out;
- }
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ type)) != 0)
+ goto bad;
atomic_set_int(&fp->f_flag, FHASLOCK);
}
-
- VOP_UNLOCK(vp, 0);
+ if (fmode & O_TRUNC) {
+ error = fo_truncate(fp, 0, td->td_ucred, td);
+ if (error)
+ goto bad;
+ }
+
+ error = finstall(td, fp, &indx, fmode);
+bad:
+ VFS_UNLOCK_GIANT(vfslocked);
fdrop(fp, td);
- VFS_UNLOCK_GIANT(vfslocked);
td->td_retval[0] = indx;
- return (0);
-
-bad:
- vput(vp);
-out:
- VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
@@ -4679,7 +4592,22 @@
} */ *uap;
{
struct stat sb;
- fhandle_t fh;
+ struct fhandle fh;
+ int error;
+
+ error = copyin(uap->u_fhp, &fh, sizeof(fh));
+ if (error != 0)
+ return (error);
+ error = kern_fhstat(td, fh, &sb);
+ if (error != 0)
+ return (error);
+ error = copyout(&sb, uap->sb, sizeof(sb));
+ return (error);
+}
+
+int
+kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
+{
struct mount *mp;
struct vnode *vp;
int vfslocked;
@@ -4688,9 +4616,6 @@
error = priv_check(td, PRIV_VFS_FHSTAT);
if (error)
return (error);
- error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
- if (error)
- return (error);
if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
return (ESTALE);
vfslocked = VFS_LOCK_GIANT(mp);
@@ -4700,12 +4625,9 @@
VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
- error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
+ error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
vput(vp);
VFS_UNLOCK_GIANT(vfslocked);
- if (error)
- return (error);
- error = copyout(&sb, uap->sb, sizeof(sb));
return (error);
}
@@ -4960,6 +4882,8 @@
new->fa_advice = advice;
new->fa_start = offset;
new->fa_end = end;
+ new->fa_prevstart = 0;
+ new->fa_prevend = 0;
fp->f_advice = new;
new = fa;
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/kern/vfs_vnops.c
--- a/head/sys/kern/vfs_vnops.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/kern/vfs_vnops.c Wed Jul 25 16:40:53 2012 +0300
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/kern/vfs_vnops.c 232701 2012-03-08 20:27:20Z jhb $");
+__FBSDID("$FreeBSD: head/sys/kern/vfs_vnops.c 238029 2012-07-02 21:01:03Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -56,6 +56,7 @@
#include <sys/filio.h>
#include <sys/resourcevar.h>
#include <sys/sx.h>
+#include <sys/sysctl.h>
#include <sys/ttycom.h>
#include <sys/conf.h>
#include <sys/syslog.h>
@@ -65,10 +66,15 @@
#include <security/mac/mac_framework.h>
#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
#include <vm/vm_object.h>
+#include <vm/vm_page.h>
static fo_rdwr_t vn_read;
static fo_rdwr_t vn_write;
+static fo_rdwr_t vn_io_fault;
static fo_truncate_t vn_truncate;
static fo_ioctl_t vn_ioctl;
static fo_poll_t vn_poll;
@@ -77,8 +83,8 @@
static fo_close_t vn_closefile;
struct fileops vnops = {
- .fo_read = vn_read,
- .fo_write = vn_write,
+ .fo_read = vn_io_fault,
+ .fo_write = vn_io_fault,
.fo_truncate = vn_truncate,
.fo_ioctl = vn_ioctl,
.fo_poll = vn_poll,
@@ -102,7 +108,8 @@
}
/*
- * Common code for vnode open operations.
+ * Common code for vnode open operations via a name lookup.
+ * Lookup the vnode and invoke VOP_CREATE if needed.
* Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
*
* Note that this does NOT free nameidata for the successful case,
@@ -118,7 +125,6 @@
struct vattr vat;
struct vattr *vap = &vat;
int fmode, error;
- accmode_t accmode;
int vfslocked, mpsafe;
mpsafe = ndp->ni_cnd.cn_flags & MPSAFE;
@@ -199,24 +205,44 @@
vfslocked = NDHASGIANT(ndp);
vp = ndp->ni_vp;
}
- if (vp->v_type == VLNK) {
- error = EMLINK;
+ error = vn_open_vnode(vp, fmode, cred, td, fp);
+ if (error)
goto bad;
- }
- if (vp->v_type == VSOCK) {
- error = EOPNOTSUPP;
- goto bad;
- }
- if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
- error = ENOTDIR;
- goto bad;
- }
+ *flagp = fmode;
+ if (!mpsafe)
+ VFS_UNLOCK_GIANT(vfslocked);
+ return (0);
+bad:
+ NDFREE(ndp, NDF_ONLY_PNBUF);
+ vput(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ *flagp = fmode;
+ ndp->ni_vp = NULL;
+ return (error);
+}
+
+/*
+ * Common code for vnode open operations once a vnode is located.
+ * Check permissions, and call the VOP_OPEN routine.
+ */
+int
+vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
+ struct thread *td, struct file *fp)
+{
+ accmode_t accmode;
+ int error;
+
+ VFS_ASSERT_GIANT(vp->v_mount);
+ if (vp->v_type == VLNK)
+ return (EMLINK);
+ if (vp->v_type == VSOCK)
+ return (EOPNOTSUPP);
+ if (vp->v_type != VDIR && fmode & O_DIRECTORY)
+ return (ENOTDIR);
accmode = 0;
if (fmode & (FWRITE | O_TRUNC)) {
- if (vp->v_type == VDIR) {
- error = EISDIR;
- goto bad;
- }
+ if (vp->v_type == VDIR)
+ return (EISDIR);
accmode |= VWRITE;
}
if (fmode & FREAD)
@@ -228,40 +254,30 @@
#ifdef MAC
error = mac_vnode_check_open(cred, vp, accmode);
if (error)
- goto bad;
+ return (error);
#endif
if ((fmode & O_CREAT) == 0) {
if (accmode & VWRITE) {
error = vn_writechk(vp);
if (error)
- goto bad;
+ return (error);
}
if (accmode) {
error = VOP_ACCESS(vp, accmode, cred, td);
if (error)
- goto bad;
+ return (error);
}
}
if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
- goto bad;
+ return (error);
if (fmode & FWRITE) {
vp->v_writecount++;
CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
__func__, vp, vp->v_writecount);
}
- *flagp = fmode;
- ASSERT_VOP_LOCKED(vp, "vn_open_cred");
- if (!mpsafe)
- VFS_UNLOCK_GIANT(vfslocked);
+ ASSERT_VOP_LOCKED(vp, "vn_open_vnode");
return (0);
-bad:
- NDFREE(ndp, NDF_ONLY_PNBUF);
- vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
- *flagp = fmode;
- ndp->ni_vp = NULL;
- return (error);
}
/*
@@ -367,47 +383,19 @@
* Package up an I/O request on a vnode into a uio and do it.
*/
int
-vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
- aresid, td)
- enum uio_rw rw;
- struct vnode *vp;
- void *base;
- int len;
- off_t offset;
- enum uio_seg segflg;
- int ioflg;
- struct ucred *active_cred;
- struct ucred *file_cred;
- ssize_t *aresid;
- struct thread *td;
+vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset,
+ enum uio_seg segflg, int ioflg, struct ucred *active_cred,
+ struct ucred *file_cred, ssize_t *aresid, struct thread *td)
{
struct uio auio;
struct iovec aiov;
struct mount *mp;
struct ucred *cred;
+ void *rl_cookie;
int error, lock_flags;
VFS_ASSERT_GIANT(vp->v_mount);
- if ((ioflg & IO_NODELOCKED) == 0) {
- mp = NULL;
- if (rw == UIO_WRITE) {
- if (vp->v_type != VCHR &&
- (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
- != 0)
- return (error);
- if (MNT_SHARED_WRITES(mp) ||
- ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
- lock_flags = LK_SHARED;
- } else {
- lock_flags = LK_EXCLUSIVE;
- }
- vn_lock(vp, lock_flags | LK_RETRY);
- } else
- vn_lock(vp, LK_SHARED | LK_RETRY);
-
- }
- ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
aiov.iov_base = base;
@@ -418,6 +406,33 @@
auio.uio_rw = rw;
auio.uio_td = td;
error = 0;
+
+ if ((ioflg & IO_NODELOCKED) == 0) {
+ if (rw == UIO_READ) {
+ rl_cookie = vn_rangelock_rlock(vp, offset,
+ offset + len);
+ } else {
+ rl_cookie = vn_rangelock_wlock(vp, offset,
+ offset + len);
+ }
+ mp = NULL;
+ if (rw == UIO_WRITE) {
+ if (vp->v_type != VCHR &&
+ (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
+ != 0)
+ goto out;
+ if (MNT_SHARED_WRITES(mp) ||
+ ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount)))
+ lock_flags = LK_SHARED;
+ else
+ lock_flags = LK_EXCLUSIVE;
+ } else
+ lock_flags = LK_SHARED;
+ vn_lock(vp, lock_flags | LK_RETRY);
+ } else
+ rl_cookie = NULL;
+
+ ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
#ifdef MAC
if ((ioflg & IO_NOMACCHECK) == 0) {
if (rw == UIO_READ)
@@ -429,7 +444,7 @@
}
#endif
if (error == 0) {
- if (file_cred)
+ if (file_cred != NULL)
cred = file_cred;
else
cred = active_cred;
@@ -444,10 +459,13 @@
if (auio.uio_resid && error == 0)
error = EIO;
if ((ioflg & IO_NODELOCKED) == 0) {
- if (rw == UIO_WRITE && vp->v_type != VCHR)
+ VOP_UNLOCK(vp, 0);
+ if (mp != NULL)
vn_finished_write(mp);
- VOP_UNLOCK(vp, 0);
}
+ out:
+ if (rl_cookie != NULL)
+ vn_rangelock_unlock(vp, rl_cookie);
return (error);
}
@@ -509,6 +527,110 @@
return (error);
}
+off_t
+foffset_lock(struct file *fp, int flags)
+{
+ struct mtx *mtxp;
+ off_t res;
+
+ KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
+
+#if OFF_MAX <= LONG_MAX
+ /*
+ * Caller only wants the current f_offset value. Assume that
+ * the long and shorter integer types reads are atomic.
+ */
+ if ((flags & FOF_NOLOCK) != 0)
+ return (fp->f_offset);
+#endif
+
+ /*
+ * According to McKusick the vn lock was protecting f_offset here.
+ * It is now protected by the FOFFSET_LOCKED flag.
+ */
+ mtxp = mtx_pool_find(mtxpool_sleep, fp);
+ mtx_lock(mtxp);
+ if ((flags & FOF_NOLOCK) == 0) {
+ while (fp->f_vnread_flags & FOFFSET_LOCKED) {
+ fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
+ msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
+ "vofflock", 0);
+ }
+ fp->f_vnread_flags |= FOFFSET_LOCKED;
+ }
+ res = fp->f_offset;
+ mtx_unlock(mtxp);
+ return (res);
+}
+
+void
+foffset_unlock(struct file *fp, off_t val, int flags)
+{
+ struct mtx *mtxp;
+
+ KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
+
+#if OFF_MAX <= LONG_MAX
+ if ((flags & FOF_NOLOCK) != 0) {
+ if ((flags & FOF_NOUPDATE) == 0)
+ fp->f_offset = val;
+ if ((flags & FOF_NEXTOFF) != 0)
+ fp->f_nextoff = val;
+ return;
+ }
+#endif
+
+ mtxp = mtx_pool_find(mtxpool_sleep, fp);
+ mtx_lock(mtxp);
+ if ((flags & FOF_NOUPDATE) == 0)
+ fp->f_offset = val;
+ if ((flags & FOF_NEXTOFF) != 0)
+ fp->f_nextoff = val;
+ if ((flags & FOF_NOLOCK) == 0) {
+ KASSERT((fp->f_vnread_flags & FOFFSET_LOCKED) != 0,
+ ("Lost FOFFSET_LOCKED"));
+ if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
+ wakeup(&fp->f_vnread_flags);
+ fp->f_vnread_flags = 0;
+ }
+ mtx_unlock(mtxp);
+}
+
+void
+foffset_lock_uio(struct file *fp, struct uio *uio, int flags)
+{
+
+ if ((flags & FOF_OFFSET) == 0)
+ uio->uio_offset = foffset_lock(fp, flags);
+}
+
+void
+foffset_unlock_uio(struct file *fp, struct uio *uio, int flags)
+{
+
+ if ((flags & FOF_OFFSET) == 0)
+ foffset_unlock(fp, uio->uio_offset, flags);
+}
+
+static int
+get_advice(struct file *fp, struct uio *uio)
+{
+ struct mtx *mtxp;
+ int ret;
+
+ ret = POSIX_FADV_NORMAL;
+ if (fp->f_advice == NULL)
+ return (ret);
+
+ mtxp = mtx_pool_find(mtxpool_sleep, fp);
+ mtx_lock(mtxp);
+ if (uio->uio_offset >= fp->f_advice->fa_start &&
+ uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
+ ret = fp->f_advice->fa_advice;
+ mtx_unlock(mtxp);
+ return (ret);
+}
+
/*
* File table vnode read routine.
*/
@@ -521,44 +643,22 @@
struct thread *td;
{
struct vnode *vp;
+ struct mtx *mtxp;
int error, ioflag;
- struct mtx *mtxp;
int advice, vfslocked;
- off_t offset;
+ off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
- mtxp = NULL;
+ KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET"));
vp = fp->f_vnode;
ioflag = 0;
if (fp->f_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
if (fp->f_flag & O_DIRECT)
ioflag |= IO_DIRECT;
- advice = POSIX_FADV_NORMAL;
+ advice = get_advice(fp, uio);
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- /*
- * According to McKusick the vn lock was protecting f_offset here.
- * It is now protected by the FOFFSET_LOCKED flag.
- */
- if ((flags & FOF_OFFSET) == 0 || fp->f_advice != NULL) {
- mtxp = mtx_pool_find(mtxpool_sleep, fp);
- mtx_lock(mtxp);
- if ((flags & FOF_OFFSET) == 0) {
- while (fp->f_vnread_flags & FOFFSET_LOCKED) {
- fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
- msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
- "vnread offlock", 0);
- }
- fp->f_vnread_flags |= FOFFSET_LOCKED;
- uio->uio_offset = fp->f_offset;
- }
- if (fp->f_advice != NULL &&
- uio->uio_offset >= fp->f_advice->fa_start &&
- uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
- advice = fp->f_advice->fa_advice;
- mtx_unlock(mtxp);
- }
vn_lock(vp, LK_SHARED | LK_RETRY);
switch (advice) {
@@ -578,20 +678,42 @@
if (error == 0)
#endif
error = VOP_READ(vp, uio, ioflag, fp->f_cred);
- if ((flags & FOF_OFFSET) == 0) {
- fp->f_offset = uio->uio_offset;
- mtx_lock(mtxp);
- if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
- wakeup(&fp->f_vnread_flags);
- fp->f_vnread_flags = 0;
- mtx_unlock(mtxp);
- }
fp->f_nextoff = uio->uio_offset;
VOP_UNLOCK(vp, 0);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
- offset != uio->uio_offset)
- error = VOP_ADVISE(vp, offset, uio->uio_offset - 1,
- POSIX_FADV_DONTNEED);
+ offset != uio->uio_offset) {
+ /*
+ * Use POSIX_FADV_DONTNEED to flush clean pages and
+ * buffers for the backing file after a
+ * POSIX_FADV_NOREUSE read(2). To optimize the common
+ * case of using POSIX_FADV_NOREUSE with sequential
+ * access, track the previous implicit DONTNEED
+ * request and grow this request to include the
+ * current read(2) in addition to the previous
+ * DONTNEED. With purely sequential access this will
+ * cause the DONTNEED requests to continously grow to
+ * cover all of the previously read regions of the
+ * file. This allows filesystem blocks that are
+ * accessed by multiple calls to read(2) to be flushed
+ * once the last read(2) finishes.
+ */
+ start = offset;
+ end = uio->uio_offset - 1;
+ mtxp = mtx_pool_find(mtxpool_sleep, fp);
+ mtx_lock(mtxp);
+ if (fp->f_advice != NULL &&
+ fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
+ if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
+ start = fp->f_advice->fa_prevstart;
+ else if (fp->f_advice->fa_prevstart != 0 &&
+ fp->f_advice->fa_prevstart == end + 1)
+ end = fp->f_advice->fa_prevend;
+ fp->f_advice->fa_prevstart = start;
+ fp->f_advice->fa_prevend = end;
+ }
+ mtx_unlock(mtxp);
+ error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
+ }
VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
@@ -609,12 +731,14 @@
{
struct vnode *vp;
struct mount *mp;
+ struct mtx *mtxp;
int error, ioflag, lock_flags;
- struct mtx *mtxp;
int advice, vfslocked;
+ off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
+ KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET"));
vp = fp->f_vnode;
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
if (vp->v_type == VREG)
@@ -633,6 +757,8 @@
if (vp->v_type != VCHR &&
(error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
goto unlock;
+
+ advice = get_advice(fp, uio);
if ((MNT_SHARED_WRITES(mp) ||
((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) &&
@@ -643,74 +769,360 @@
}
vn_lock(vp, lock_flags | LK_RETRY);
- if ((flags & FOF_OFFSET) == 0)
- uio->uio_offset = fp->f_offset;
- advice = POSIX_FADV_NORMAL;
- if (fp->f_advice != NULL) {
- mtxp = mtx_pool_find(mtxpool_sleep, fp);
- mtx_lock(mtxp);
- if (fp->f_advice != NULL &&
- uio->uio_offset >= fp->f_advice->fa_start &&
- uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
- advice = fp->f_advice->fa_advice;
- mtx_unlock(mtxp);
- }
switch (advice) {
case POSIX_FADV_NORMAL:
case POSIX_FADV_SEQUENTIAL:
+ case POSIX_FADV_NOREUSE:
ioflag |= sequential_heuristic(uio, fp);
break;
case POSIX_FADV_RANDOM:
/* XXX: Is this correct? */
break;
- case POSIX_FADV_NOREUSE:
- /*
- * Request the underlying FS to discard the buffers
- * and pages after the I/O is complete.
- */
- ioflag |= IO_DIRECT;
- break;
}
+ offset = uio->uio_offset;
#ifdef MAC
error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
if (error == 0)
#endif
error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
- if ((flags & FOF_OFFSET) == 0)
- fp->f_offset = uio->uio_offset;
fp->f_nextoff = uio->uio_offset;
VOP_UNLOCK(vp, 0);
if (vp->v_type != VCHR)
vn_finished_write(mp);
+ if (error == 0 && advice == POSIX_FADV_NOREUSE &&
+ offset != uio->uio_offset) {
+ /*
+ * Use POSIX_FADV_DONTNEED to flush clean pages and
+ * buffers for the backing file after a
+ * POSIX_FADV_NOREUSE write(2). To optimize the
+ * common case of using POSIX_FADV_NOREUSE with
+ * sequential access, track the previous implicit
+ * DONTNEED request and grow this request to include
+ * the current write(2) in addition to the previous
+ * DONTNEED. With purely sequential access this will
+ * cause the DONTNEED requests to continously grow to
+ * cover all of the previously written regions of the
+ * file.
+ *
+ * Note that the blocks just written are almost
+ * certainly still dirty, so this only works when
+ * VOP_ADVISE() calls from subsequent writes push out
+ * the data written by this write(2) once the backing
+ * buffers are clean. However, as compared to forcing
+ * IO_DIRECT, this gives much saner behavior. Write
+ * clustering is still allowed, and clean pages are
+ * merely moved to the cache page queue rather than
+ * outright thrown away. This means a subsequent
+ * read(2) can still avoid hitting the disk if the
+ * pages have not been reclaimed.
+ *
+ * This does make POSIX_FADV_NOREUSE largely useless
+ * with non-sequential access. However, sequential
+ * access is the more common use case and the flag is
+ * merely advisory.
+ */
+ start = offset;
+ end = uio->uio_offset - 1;
+ mtxp = mtx_pool_find(mtxpool_sleep, fp);
+ mtx_lock(mtxp);
+ if (fp->f_advice != NULL &&
+ fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
+ if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
+ start = fp->f_advice->fa_prevstart;
+ else if (fp->f_advice->fa_prevstart != 0 &&
+ fp->f_advice->fa_prevstart == end + 1)
+ end = fp->f_advice->fa_prevend;
+ fp->f_advice->fa_prevstart = start;
+ fp->f_advice->fa_prevend = end;
+ }
+ mtx_unlock(mtxp);
+ error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
+ }
+
unlock:
VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
+static const int io_hold_cnt = 16;
+static int vn_io_fault_enable = 1;
+SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RW,
+ &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance");
+static unsigned long vn_io_faults_cnt;
+SYSCTL_LONG(_debug, OID_AUTO, vn_io_faults, CTLFLAG_RD,
+ &vn_io_faults_cnt, 0, "Count of vn_io_fault lock avoidance triggers");
+
+/*
+ * The vn_io_fault() is a wrapper around vn_read() and vn_write() to
+ * prevent the following deadlock:
+ *
+ * Assume that the thread A reads from the vnode vp1 into userspace
+ * buffer buf1 backed by the pages of vnode vp2. If a page in buf1 is
+ * currently not resident, then system ends up with the call chain
+ * vn_read() -> VOP_READ(vp1) -> uiomove() -> [Page Fault] ->
+ * vm_fault(buf1) -> vnode_pager_getpages(vp2) -> VOP_GETPAGES(vp2)
+ * which establishes lock order vp1->vn_lock, then vp2->vn_lock.
+ * If, at the same time, thread B reads from vnode vp2 into buffer buf2
+ * backed by the pages of vnode vp1, and some page in buf2 is not
+ * resident, we get a reversed order vp2->vn_lock, then vp1->vn_lock.
+ *
+ * To prevent the lock order reversal and deadlock, vn_io_fault() does
+ * not allow page faults to happen during VOP_READ() or VOP_WRITE().
+ * Instead, it first tries to do the whole range i/o with pagefaults
+ * disabled. If all pages in the i/o buffer are resident and mapped,
+ * VOP will succeed (ignoring the genuine filesystem errors).
+ * Otherwise, we get back EFAULT, and vn_io_fault() falls back to do
+ * i/o in chunks, with all pages in the chunk prefaulted and held
+ * using vm_fault_quick_hold_pages().
+ *
+ * Filesystems using this deadlock avoidance scheme should use the
+ * array of the held pages from uio, saved in the curthread->td_ma,
+ * instead of doing uiomove(). A helper function
+ * vn_io_fault_uiomove() converts uiomove request into
+ * uiomove_fromphys() over td_ma array.
+ *
+ * Since vnode locks do not cover the whole i/o anymore, rangelocks
+ * make the current i/o request atomic with respect to other i/os and
+ * truncations.
+ */
+static int
+vn_io_fault(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+ vm_page_t ma[io_hold_cnt + 2];
+ struct uio *uio_clone, short_uio;
+ struct iovec short_iovec[1];
+ fo_rdwr_t *doio;
+ struct vnode *vp;
+ void *rl_cookie;
+ struct mount *mp;
+ vm_page_t *prev_td_ma;
+ int cnt, error, save, saveheld, prev_td_ma_cnt;
+ vm_offset_t addr, end;
+ vm_prot_t prot;
+ size_t len, resid;
+ ssize_t adv;
+
+ if (uio->uio_rw == UIO_READ)
+ doio = vn_read;
+ else
+ doio = vn_write;
+ vp = fp->f_vnode;
+ foffset_lock_uio(fp, uio, flags);
+
+ if (uio->uio_segflg != UIO_USERSPACE || vp->v_type != VREG ||
+ ((mp = vp->v_mount) != NULL &&
+ (mp->mnt_kern_flag & MNTK_NO_IOPF) == 0) ||
+ !vn_io_fault_enable) {
+ error = doio(fp, uio, active_cred, flags | FOF_OFFSET, td);
+ goto out_last;
+ }
+
+ /*
+ * The UFS follows IO_UNIT directive and replays back both
+ * uio_offset and uio_resid if an error is encountered during the
+ * operation. But, since the iovec may be already advanced,
+ * uio is still in an inconsistent state.
+ *
+ * Cache a copy of the original uio, which is advanced to the redo
+ * point using UIO_NOCOPY below.
+ */
+ uio_clone = cloneuio(uio);
+ resid = uio->uio_resid;
+
+ short_uio.uio_segflg = UIO_USERSPACE;
+ short_uio.uio_rw = uio->uio_rw;
+ short_uio.uio_td = uio->uio_td;
+
+ if (uio->uio_rw == UIO_READ) {
+ prot = VM_PROT_WRITE;
+ rl_cookie = vn_rangelock_rlock(vp, uio->uio_offset,
+ uio->uio_offset + uio->uio_resid);
+ } else {
+ prot = VM_PROT_READ;
+ if ((fp->f_flag & O_APPEND) != 0 || (flags & FOF_OFFSET) == 0)
+ /* For appenders, punt and lock the whole range. */
+ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
+ else
+ rl_cookie = vn_rangelock_wlock(vp, uio->uio_offset,
+ uio->uio_offset + uio->uio_resid);
+ }
+
+ save = vm_fault_disable_pagefaults();
+ error = doio(fp, uio, active_cred, flags | FOF_OFFSET, td);
+ if (error != EFAULT)
+ goto out;
+
+ atomic_add_long(&vn_io_faults_cnt, 1);
+ uio_clone->uio_segflg = UIO_NOCOPY;
+ uiomove(NULL, resid - uio->uio_resid, uio_clone);
+ uio_clone->uio_segflg = uio->uio_segflg;
+
+ saveheld = curthread_pflags_set(TDP_UIOHELD);
+ prev_td_ma = td->td_ma;
+ prev_td_ma_cnt = td->td_ma_cnt;
+
+ while (uio_clone->uio_resid != 0) {
+ len = uio_clone->uio_iov->iov_len;
+ if (len == 0) {
+ KASSERT(uio_clone->uio_iovcnt >= 1,
+ ("iovcnt underflow"));
+ uio_clone->uio_iov++;
+ uio_clone->uio_iovcnt--;
+ continue;
+ }
+
+ addr = (vm_offset_t)uio_clone->uio_iov->iov_base;
+ end = round_page(addr + len);
+ cnt = howmany(end - trunc_page(addr), PAGE_SIZE);
+ /*
+ * A perfectly misaligned address and length could cause
+ * both the start and the end of the chunk to use partial
+ * page. +2 accounts for such a situation.
+ */
+ if (cnt > io_hold_cnt + 2) {
+ len = io_hold_cnt * PAGE_SIZE;
+ KASSERT(howmany(round_page(addr + len) -
+ trunc_page(addr), PAGE_SIZE) <= io_hold_cnt + 2,
+ ("cnt overflow"));
+ }
+ cnt = vm_fault_quick_hold_pages(&td->td_proc->p_vmspace->vm_map,
+ addr, len, prot, ma, io_hold_cnt + 2);
+ if (cnt == -1) {
+ error = EFAULT;
+ break;
+ }
+ short_uio.uio_iov = &short_iovec[0];
+ short_iovec[0].iov_base = (void *)addr;
+ short_uio.uio_iovcnt = 1;
+ short_uio.uio_resid = short_iovec[0].iov_len = len;
+ short_uio.uio_offset = uio_clone->uio_offset;
+ td->td_ma = ma;
+ td->td_ma_cnt = cnt;
+
+ error = doio(fp, &short_uio, active_cred, flags | FOF_OFFSET,
+ td);
+ vm_page_unhold_pages(ma, cnt);
+ adv = len - short_uio.uio_resid;
+
+ uio_clone->uio_iov->iov_base =
+ (char *)uio_clone->uio_iov->iov_base + adv;
+ uio_clone->uio_iov->iov_len -= adv;
+ uio_clone->uio_resid -= adv;
+ uio_clone->uio_offset += adv;
+
+ uio->uio_resid -= adv;
+ uio->uio_offset += adv;
+
+ if (error != 0 || adv == 0)
+ break;
+ }
+ td->td_ma = prev_td_ma;
+ td->td_ma_cnt = prev_td_ma_cnt;
+ curthread_pflags_restore(saveheld);
+out:
+ vm_fault_enable_pagefaults(save);
+ vn_rangelock_unlock(vp, rl_cookie);
+ free(uio_clone, M_IOV);
+out_last:
+ foffset_unlock_uio(fp, uio, flags);
+ return (error);
+}
+
+/*
+ * Helper function to perform the requested uiomove operation using
+ * the held pages for io->uio_iov[0].iov_base buffer instead of
+ * copyin/copyout. Access to the pages with uiomove_fromphys()
+ * instead of iov_base prevents page faults that could occur due to
+ * pmap_collect() invalidating the mapping created by
+ * vm_fault_quick_hold_pages(), or pageout daemon, page laundry or
+ * object cleanup revoking the write access from page mappings.
+ *
+ * Filesystems specified MNTK_NO_IOPF shall use vn_io_fault_uiomove()
+ * instead of plain uiomove().
+ */
+int
+vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio)
+{
+ struct uio transp_uio;
+ struct iovec transp_iov[1];
+ struct thread *td;
+ size_t adv;
+ int error, pgadv;
+
+ td = curthread;
+ if ((td->td_pflags & TDP_UIOHELD) == 0 ||
+ uio->uio_segflg != UIO_USERSPACE)
+ return (uiomove(data, xfersize, uio));
+
+ KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt));
+ transp_iov[0].iov_base = data;
+ transp_uio.uio_iov = &transp_iov[0];
+ transp_uio.uio_iovcnt = 1;
+ if (xfersize > uio->uio_resid)
+ xfersize = uio->uio_resid;
+ transp_uio.uio_resid = transp_iov[0].iov_len = xfersize;
+ transp_uio.uio_offset = 0;
+ transp_uio.uio_segflg = UIO_SYSSPACE;
+ /*
+ * Since transp_iov points to data, and td_ma page array
+ * corresponds to original uio->uio_iov, we need to invert the
+ * direction of the i/o operation as passed to
+ * uiomove_fromphys().
+ */
+ switch (uio->uio_rw) {
+ case UIO_WRITE:
+ transp_uio.uio_rw = UIO_READ;
+ break;
+ case UIO_READ:
+ transp_uio.uio_rw = UIO_WRITE;
+ break;
+ }
+ transp_uio.uio_td = uio->uio_td;
+ error = uiomove_fromphys(td->td_ma,
+ ((vm_offset_t)uio->uio_iov->iov_base) & PAGE_MASK,
+ xfersize, &transp_uio);
+ adv = xfersize - transp_uio.uio_resid;
+ pgadv =
+ (((vm_offset_t)uio->uio_iov->iov_base + adv) >> PAGE_SHIFT) -
+ (((vm_offset_t)uio->uio_iov->iov_base) >> PAGE_SHIFT);
+ td->td_ma += pgadv;
+ KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt,
+ pgadv));
+ td->td_ma_cnt -= pgadv;
+ uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + adv;
+ uio->uio_iov->iov_len -= adv;
+ uio->uio_resid -= adv;
+ uio->uio_offset += adv;
+ return (error);
+}
+
/*
* File table truncate routine.
*/
static int
-vn_truncate(fp, length, active_cred, td)
- struct file *fp;
- off_t length;
- struct ucred *active_cred;
- struct thread *td;
+vn_truncate(struct file *fp, off_t length, struct ucred *active_cred,
+ struct thread *td)
{
struct vattr vattr;
struct mount *mp;
struct vnode *vp;
+ void *rl_cookie;
int vfslocked;
int error;
vp = fp->f_vnode;
+
+ /*
+ * Lock the whole range for truncation. Otherwise split i/o
+ * might happen partly before and partly after the truncation.
+ */
+ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
- if (error) {
- VFS_UNLOCK_GIANT(vfslocked);
- return (error);
- }
+ if (error)
+ goto out1;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_type == VDIR) {
error = EISDIR;
@@ -730,7 +1142,9 @@
out:
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
+out1:
VFS_UNLOCK_GIANT(vfslocked);
+ vn_rangelock_unlock(vp, rl_cookie);
return (error);
}
@@ -1466,3 +1880,56 @@
vm_object_page_remove(object, start, end, 0);
VM_OBJECT_UNLOCK(object);
}
+
+int
+vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
+{
+ struct vattr va;
+ daddr_t bn, bnp;
+ uint64_t bsize;
+ off_t noff;
+ int error;
+
+ KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
+ ("Wrong command %lu", cmd));
+
+ if (vn_lock(vp, LK_SHARED) != 0)
+ return (EBADF);
+ if (vp->v_type != VREG) {
+ error = ENOTTY;
+ goto unlock;
+ }
+ error = VOP_GETATTR(vp, &va, cred);
+ if (error != 0)
+ goto unlock;
+ noff = *off;
+ if (noff >= va.va_size) {
+ error = ENXIO;
+ goto unlock;
+ }
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize) {
+ error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL);
+ if (error == EOPNOTSUPP) {
+ error = ENOTTY;
+ goto unlock;
+ }
+ if ((bnp == -1 && cmd == FIOSEEKHOLE) ||
+ (bnp != -1 && cmd == FIOSEEKDATA)) {
+ noff = bn * bsize;
+ if (noff < *off)
+ noff = *off;
+ goto unlock;
+ }
+ }
+ if (noff > va.va_size)
+ noff = va.va_size;
+ /* noff == va.va_size. There is an implicit hole at the end of file. */
+ if (cmd == FIOSEEKDATA)
+ error = ENXIO;
+unlock:
+ VOP_UNLOCK(vp, 0);
+ if (error == 0)
+ *off = noff;
+ return (error);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/icmp_var.h
--- a/head/sys/netinet/icmp_var.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/icmp_var.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)icmp_var.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD$
+ * $FreeBSD: head/sys/netinet/icmp_var.h 237230 2012-06-18 17:11:24Z tuexen $
*/
#ifndef _NETINET_ICMP_VAR_H_
@@ -102,7 +102,8 @@
#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */
#define BANDLIM_ICMP6_UNREACH 5
-#define BANDLIM_MAX 5
+#define BANDLIM_SCTP_OOTB 6
+#define BANDLIM_MAX 6
#endif
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/if_ether.c
--- a/head/sys/netinet/if_ether.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/if_ether.c Wed Jul 25 16:40:53 2012 +0300
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/if_ether.c 230442 2012-01-22 02:13:19Z bz $");
+__FBSDID("$FreeBSD: head/sys/netinet/if_ether.c 237263 2012-06-19 07:34:13Z np $");
#include "opt_inet.h"
@@ -180,6 +180,17 @@
callout_active(&lle->la_timer)) {
callout_stop(&lle->la_timer);
LLE_REMREF(lle);
+
+ if (lle->la_flags != LLE_DELETED) {
+ int evt;
+
+ if (lle->la_flags & LLE_VALID)
+ evt = LLENTRY_EXPIRED;
+ else
+ evt = LLENTRY_TIMEDOUT;
+ EVENTHANDLER_INVOKE(lle_event, lle, evt);
+ }
+
pkts_dropped = llentry_free(lle);
ARPSTAT_ADD(dropped, pkts_dropped);
ARPSTAT_INC(timeouts);
@@ -726,7 +737,7 @@
(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
la->la_flags |= LLE_VALID;
- EVENTHANDLER_INVOKE(arp_update_event, la);
+ EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
if (!(la->la_flags & LLE_STATIC)) {
int canceled;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/if_ether.h
--- a/head/sys/netinet/if_ether.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/if_ether.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)if_ether.h 8.3 (Berkeley) 5/2/95
- * $FreeBSD: head/sys/netinet/if_ether.h 229810 2012-01-08 13:34:00Z glebius $
+ * $FreeBSD: head/sys/netinet/if_ether.h 237263 2012-06-19 07:34:13Z np $
*/
#ifndef _NETINET_IF_ETHER_H_
@@ -122,8 +122,14 @@
void arp_ifscrub(struct ifnet *, uint32_t);
#include <sys/eventhandler.h>
-typedef void (*llevent_arp_update_fn)(void *, struct llentry *);
-EVENTHANDLER_DECLARE(arp_update_event, llevent_arp_update_fn);
+enum {
+ LLENTRY_RESOLVED,
+ LLENTRY_TIMEDOUT,
+ LLENTRY_DELETED,
+ LLENTRY_EXPIRED,
+};
+typedef void (*lle_event_fn)(void *, struct llentry *, int);
+EVENTHANDLER_DECLARE(lle_event, lle_event_fn);
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/igmp.c
--- a/head/sys/netinet/igmp.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/igmp.c Wed Jul 25 16:40:53 2012 +0300
@@ -48,7 +48,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/igmp.c 229621 2012-01-05 19:00:36Z jhb $");
+__FBSDID("$FreeBSD: head/sys/netinet/igmp.c 238084 2012-07-03 19:04:18Z trociny $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -2285,13 +2285,11 @@
*/
KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
ifp = inm->inm_ifma->ifma_ifp;
- if (ifp != NULL) {
- /*
- * Sanity check that netinet's notion of ifp is the
- * same as net's.
- */
- KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
- }
+ /*
+ * Sanity check that netinet's notion of ifp is the
+ * same as net's.
+ */
+ KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
IGMP_LOCK();
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/in.c
--- a/head/sys/netinet/in.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/in.c Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/in.c 234087 2012-04-10 06:52:39Z glebius $");
+__FBSDID("$FreeBSD: head/sys/netinet/in.c 237263 2012-06-19 07:34:13Z np $");
#include "opt_mpath.h"
@@ -1469,7 +1469,7 @@
if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
LLE_WLOCK(lle);
lle->la_flags = LLE_DELETED;
- EVENTHANDLER_INVOKE(arp_update_event, lle);
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
LLE_WUNLOCK(lle);
#ifdef DIAGNOSTIC
log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/in.h
--- a/head/sys/netinet/in.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/in.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)in.h 8.3 (Berkeley) 1/3/94
- * $FreeBSD: head/sys/netinet/in.h 226402 2011-10-15 18:41:25Z glebius $
+ * $FreeBSD: head/sys/netinet/in.h 236959 2012-06-12 14:02:38Z tuexen $
*/
#ifndef _NETINET_IN_H_
@@ -241,6 +241,7 @@
#define IPPROTO_PIM 103 /* Protocol Independent Mcast */
#define IPPROTO_CARP 112 /* CARP */
#define IPPROTO_PGM 113 /* PGM */
+#define IPPROTO_MPLS 137 /* MPLS-in-IP */
#define IPPROTO_PFSYNC 240 /* PFSYNC */
/* 255: Reserved */
/* BSD Private, local use, namespace incursion, no longer used */
@@ -461,6 +462,7 @@
#define IP_RECVTTL 65 /* bool; receive IP TTL w/dgram */
#define IP_MINTTL 66 /* minimum TTL for packet or drop */
#define IP_DONTFRAG 67 /* don't fragment packet */
+#define IP_RECVTOS 68 /* bool; receive IP TOS w/dgram */
/* IPv4 Source Filter Multicast API [RFC3678] */
#define IP_ADD_SOURCE_MEMBERSHIP 70 /* join a source-specific group */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/in_pcb.c
--- a/head/sys/netinet/in_pcb.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/in_pcb.c Wed Jul 25 16:40:53 2012 +0300
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/in_pcb.c 230442 2012-01-22 02:13:19Z bz $");
+__FBSDID("$FreeBSD: head/sys/netinet/in_pcb.c 236959 2012-06-12 14:02:38Z tuexen $");
#include "opt_ddb.h"
#include "opt_ipsec.h"
@@ -2295,6 +2295,10 @@
db_printf("%sINP_DONTFRAG", comma ? ", " : "");
comma = 1;
}
+ if (inp_flags & INP_RECVTOS) {
+ db_printf("%sINP_RECVTOS", comma ? ", " : "");
+ comma = 1;
+ }
if (inp_flags & IN6P_IPV6_V6ONLY) {
db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
comma = 1;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/in_pcb.h
--- a/head/sys/netinet/in_pcb.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/in_pcb.h Wed Jul 25 16:40:53 2012 +0300
@@ -32,7 +32,7 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: head/sys/netinet/in_pcb.h 233096 2012-03-17 21:51:39Z rmh $
+ * $FreeBSD: head/sys/netinet/in_pcb.h 236959 2012-06-12 14:02:38Z tuexen $
*/
#ifndef _NETINET_IN_PCB_H_
@@ -509,6 +509,7 @@
#define INP_DONTFRAG 0x00000800 /* don't fragment packet */
#define INP_BINDANY 0x00001000 /* allow bind to any address */
#define INP_INHASHLIST 0x00002000 /* in_pcbinshash() has been called */
+#define INP_RECVTOS 0x00004000 /* receive incoming IP TOS */
#define IN6P_IPV6_V6ONLY 0x00008000 /* restrict AF_INET6 socket for v6 */
#define IN6P_PKTINFO 0x00010000 /* receive IP6 dst and I/F */
#define IN6P_HOPLIMIT 0x00020000 /* receive hoplimit */
@@ -528,7 +529,7 @@
#define IN6P_MTU 0x80000000 /* receive path MTU */
#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
- INP_RECVIF|INP_RECVTTL|\
+ INP_RECVIF|INP_RECVTTL|INP_RECVTOS|\
IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\
IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/in_var.h
--- a/head/sys/netinet/in_var.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/in_var.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)in_var.h 8.2 (Berkeley) 1/9/95
- * $FreeBSD: head/sys/netinet/in_var.h 229815 2012-01-08 17:20:29Z glebius $
+ * $FreeBSD: head/sys/netinet/in_var.h 238572 2012-07-18 08:41:00Z glebius $
*/
#ifndef _NETINET_IN_VAR_H_
@@ -161,14 +161,16 @@
#define IFP_TO_IA(ifp, ia) \
/* struct ifnet *ifp; */ \
/* struct in_ifaddr *ia; */ \
-{ \
+do { \
+ IN_IFADDR_RLOCK(); \
for ((ia) = TAILQ_FIRST(&V_in_ifaddrhead); \
(ia) != NULL && (ia)->ia_ifp != (ifp); \
(ia) = TAILQ_NEXT((ia), ia_link)) \
continue; \
if ((ia) != NULL) \
ifa_ref(&(ia)->ia_ifa); \
-}
+ IN_IFADDR_RUNLOCK(); \
+} while (0)
#endif
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ip.h
--- a/head/sys/netinet/ip.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ip.h Wed Jul 25 16:40:53 2012 +0300
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)ip.h 8.2 (Berkeley) 6/1/94
- * $FreeBSD$
+ * $FreeBSD: head/sys/netinet/ip.h 235036 2012-05-04 21:00:32Z delphij $
*/
#ifndef _NETINET_IP_H_
@@ -92,6 +92,31 @@
#define IPTOS_PREC_ROUTINE 0x00
/*
+ * Definitions for DiffServ Codepoints as per RFC2474
+ */
+#define IPTOS_DSCP_CS0 0x00
+#define IPTOS_DSCP_CS1 0x20
+#define IPTOS_DSCP_AF11 0x28
+#define IPTOS_DSCP_AF12 0x30
+#define IPTOS_DSCP_AF13 0x38
+#define IPTOS_DSCP_CS2 0x40
+#define IPTOS_DSCP_AF21 0x48
+#define IPTOS_DSCP_AF22 0x50
+#define IPTOS_DSCP_AF23 0x58
+#define IPTOS_DSCP_CS3 0x60
+#define IPTOS_DSCP_AF31 0x68
+#define IPTOS_DSCP_AF32 0x70
+#define IPTOS_DSCP_AF33 0x78
+#define IPTOS_DSCP_CS4 0x80
+#define IPTOS_DSCP_AF41 0x88
+#define IPTOS_DSCP_AF42 0x90
+#define IPTOS_DSCP_AF43 0x98
+#define IPTOS_DSCP_CS5 0xa0
+#define IPTOS_DSCP_EF 0xb8
+#define IPTOS_DSCP_CS6 0xc0
+#define IPTOS_DSCP_CS7 0xe0
+
+/*
* ECN (Explicit Congestion Notification) codepoints in RFC3168 mapped to the
* lower 2 bits of the TOS field.
*/
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ip_carp.c
--- a/head/sys/netinet/ip_carp.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ip_carp.c Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ip_carp.c 234130 2012-04-11 12:26:30Z glebius $");
+__FBSDID("$FreeBSD: head/sys/netinet/ip_carp.c 236310 2012-05-30 13:51:00Z glebius $");
#include "opt_bpf.h"
#include "opt_inet.h"
@@ -696,7 +696,7 @@
CARPSTATS_INC(carps_onomem);
return (ENOMEM);
}
- bcopy(&sc, (caddr_t)(mtag + 1), sizeof(struct carp_softc *));
+ bcopy(&sc, mtag + 1, sizeof(sc));
m_tag_prepend(m, mtag);
return (0);
@@ -1061,13 +1061,12 @@
IF_ADDR_RUNLOCK(ifp);
mtag = m_tag_get(PACKET_TAG_CARP,
- sizeof(struct ifnet *), M_NOWAIT);
+ sizeof(struct carp_softc *), M_NOWAIT);
if (mtag == NULL)
/* Better a bit than nothing. */
return (LLADDR(&sc->sc_addr));
- bcopy(&ifp, (caddr_t)(mtag + 1),
- sizeof(struct ifnet *));
+ bcopy(&sc, mtag + 1, sizeof(sc));
m_tag_prepend(m, mtag);
return (LLADDR(&sc->sc_addr));
@@ -1391,7 +1390,7 @@
if (mtag == NULL)
return (0);
- bcopy(mtag + 1, &sc, sizeof(struct carp_softc *));
+ bcopy(mtag + 1, &sc, sizeof(sc));
/* Set the source MAC address to the Virtual Router MAC Address. */
switch (ifp->if_type) {
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ip_fw.h
--- a/head/sys/netinet/ip_fw.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ip_fw.h Wed Jul 25 16:40:53 2012 +0300
@@ -22,7 +22,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/netinet/ip_fw.h 233478 2012-03-25 20:37:59Z melifaro $
+ * $FreeBSD: head/sys/netinet/ip_fw.h 234946 2012-05-03 08:56:43Z melifaro $
*/
#ifndef _IPFW2_H
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ip_icmp.c
--- a/head/sys/netinet/ip_icmp.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ip_icmp.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ip_icmp.c 229749 2012-01-07 00:11:36Z eadler $");
+__FBSDID("$FreeBSD: head/sys/netinet/ip_icmp.c 237230 2012-06-18 17:11:24Z tuexen $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@@ -965,7 +965,8 @@
{ "icmp tstamp response" },
{ "closed port RST response" },
{ "open port RST response" },
- { "icmp6 unreach response" }
+ { "icmp6 unreach response" },
+ { "sctp ootb response" }
};
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ip_input.c
--- a/head/sys/netinet/ip_input.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ip_input.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ip_input.c 229621 2012-01-05 19:00:36Z jhb $");
+__FBSDID("$FreeBSD: head/sys/netinet/ip_input.c 238092 2012-07-04 07:37:53Z glebius $");
#include "opt_bootp.h"
#include "opt_ipfw.h"
@@ -1495,8 +1495,7 @@
if (error == EMSGSIZE && ro.ro_rt)
mtu = ro.ro_rt->rt_rmx.rmx_mtu;
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
+ RO_RTFREE(&ro);
if (error)
IPSTAT_INC(ips_cantforward);
@@ -1684,6 +1683,12 @@
if (*mp)
mp = &(*mp)->m_next;
}
+ if (inp->inp_flags & INP_RECVTOS) {
+ *mp = sbcreatecontrol((caddr_t) &ip->ip_tos,
+ sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
}
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ip_mroute.c
--- a/head/sys/netinet/ip_mroute.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ip_mroute.c Wed Jul 25 16:40:53 2012 +0300
@@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ip_mroute.c 232517 2012-03-04 18:59:38Z zec $");
+__FBSDID("$FreeBSD: head/sys/netinet/ip_mroute.c 238016 2012-07-02 19:44:18Z glebius $");
#include "opt_inet.h"
#include "opt_mrouting.h"
@@ -924,7 +924,6 @@
vifp->v_pkt_out = 0;
vifp->v_bytes_in = 0;
vifp->v_bytes_out = 0;
- bzero(&vifp->v_route, sizeof(vifp->v_route));
/* Adjust numvifs up if the vifi is higher than numvifs */
if (V_numvifs <= vifcp->vifc_vifi)
@@ -1702,7 +1701,7 @@
* should get rejected because they appear to come from
* the loopback interface, thus preventing looping.
*/
- error = ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, &imo, NULL);
+ error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL);
CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
(ptrdiff_t)(vifp - V_viftable), error);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ip_mroute.h
--- a/head/sys/netinet/ip_mroute.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ip_mroute.h Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD$
+ * $FreeBSD: head/sys/netinet/ip_mroute.h 238016 2012-07-02 19:44:18Z glebius $
*/
#ifndef _NETINET_IP_MROUTE_H_
@@ -262,7 +262,6 @@
u_long v_pkt_out; /* # pkts out on interface */
u_long v_bytes_in; /* # bytes in on interface */
u_long v_bytes_out; /* # bytes out on interface */
- struct route v_route; /* cached route */
};
#ifdef _KERNEL
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ip_output.c
--- a/head/sys/netinet/ip_output.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ip_output.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ip_output.c 227207 2011-11-06 10:47:20Z trociny $");
+__FBSDID("$FreeBSD: head/sys/netinet/ip_output.c 238573 2012-07-18 08:58:30Z glebius $");
#include "opt_ipfw.h"
#include "opt_ipsec.h"
@@ -105,6 +105,10 @@
* ip_len and ip_off are in host format.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
+ * If route ro is present and has ro_rt initialized, route lookup would be
+ * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
+ * then result of route lookup is stored in ro->ro_rt.
+ *
* In the IP forwarding case, the packet will arrive with options already
* inserted, so must have a NULL opt pointer.
*/
@@ -119,9 +123,8 @@
int mtu;
int n; /* scratchpad */
int error = 0;
- int nortfree = 0;
struct sockaddr_in *dst;
- struct in_ifaddr *ia = NULL;
+ struct in_ifaddr *ia;
int isbroadcast, sw_csum;
struct route iproute;
struct rtentry *rte; /* cache for ro->ro_rt */
@@ -146,24 +149,23 @@
if (ro == NULL) {
ro = &iproute;
bzero(ro, sizeof (*ro));
+ }
#ifdef FLOWTABLE
- {
- struct flentry *fle;
+ if (ro->ro_rt == NULL) {
+ struct flentry *fle;
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- if ((fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET)) != NULL) {
- flow_to_route(fle, ro);
- nortfree = 1;
- }
- }
+ /*
+ * The flow table returns route entries valid for up to 30
+ * seconds; we rely on the remainder of ip_output() taking no
+ * longer than that long for the stability of ro_rt. The
+ * flow ID assignment must have happened before this point.
+ */
+ fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
+ if (fle != NULL)
+ flow_to_route(fle, ro);
+ }
#endif
- }
if (opt) {
int len = 0;
@@ -196,6 +198,7 @@
dst = (struct sockaddr_in *)&ro->ro_dst;
again:
+ ia = NULL;
/*
* If there is a cached route,
* check that it is to the same destination
@@ -209,10 +212,9 @@
!RT_LINK_IS_UP(rte->rt_ifp) ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
- if (!nortfree)
- RTFREE(rte);
- rte = ro->ro_rt = (struct rtentry *)NULL;
- ro->ro_lle = (struct llentry *)NULL;
+ RO_RTFREE(ro);
+ ro->ro_lle = NULL;
+ rte = NULL;
}
#ifdef IPFIREWALL_FORWARD
if (rte == NULL && fwd_tag == NULL) {
@@ -532,8 +534,11 @@
#endif
error = netisr_queue(NETISR_IP, m);
goto done;
- } else
+ } else {
+ if (ia != NULL)
+ ifa_free(&ia->ia_ifa);
goto again; /* Redo the routing table lookup. */
+ }
}
#ifdef IPFIREWALL_FORWARD
@@ -563,6 +568,8 @@
bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
m->m_flags |= M_SKIP_FIREWALL;
m_tag_delete(m, fwd_tag);
+ if (ia != NULL)
+ ifa_free(&ia->ia_ifa);
goto again;
}
#endif /* IPFIREWALL_FORWARD */
@@ -672,9 +679,8 @@
IPSTAT_INC(ips_fragmented);
done:
- if (ro == &iproute && ro->ro_rt && !nortfree) {
- RTFREE(ro->ro_rt);
- }
+ if (ro == &iproute)
+ RO_RTFREE(ro);
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (error);
@@ -984,6 +990,7 @@
case IP_FAITH:
case IP_ONESBCAST:
case IP_DONTFRAG:
+ case IP_RECVTOS:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
@@ -1047,6 +1054,9 @@
case IP_BINDANY:
OPTSET(INP_BINDANY);
break;
+ case IP_RECVTOS:
+ OPTSET(INP_RECVTOS);
+ break;
}
break;
#undef OPTSET
@@ -1156,6 +1166,7 @@
case IP_ONESBCAST:
case IP_DONTFRAG:
case IP_BINDANY:
+ case IP_RECVTOS:
switch (sopt->sopt_name) {
case IP_TOS:
@@ -1214,6 +1225,9 @@
case IP_BINDANY:
optval = OPTBIT(INP_BINDANY);
break;
+ case IP_RECVTOS:
+ optval = OPTBIT(INP_RECVTOS);
+ break;
}
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ipfw/ip_dummynet.c
--- a/head/sys/netinet/ipfw/ip_dummynet.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ipfw/ip_dummynet.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_dummynet.c 222560 2011-06-01 12:33:05Z ae $");
+__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_dummynet.c 238063 2012-07-03 08:42:48Z issyl0 $");
/*
* Configuration and internal object management for dummynet.
@@ -97,7 +97,7 @@
struct dn_alg *d;
SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
- if (d->type == type || (name && !strcmp(d->name, name)))
+ if (d->type == type || (name && !strcasecmp(d->name, name)))
return d;
}
return NULL; /* not found */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ipfw/ip_fw_log.c
--- a/head/sys/netinet/ipfw/ip_fw_log.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ipfw/ip_fw_log.c Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_log.c 227085 2011-11-04 16:24:19Z bz $");
+__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_log.c 238277 2012-07-09 07:16:19Z hrs $");
/*
* Logging support for ipfw
@@ -44,8 +44,11 @@
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
#include <net/ethernet.h> /* for ETHERTYPE_IP */
#include <net/if.h>
+#include <net/if_clone.h>
#include <net/vnet.h>
#include <net/if_types.h> /* for IFT_ETHER */
#include <net/bpf.h> /* for BPF */
@@ -90,6 +93,15 @@
}
#else /* !WITHOUT_BPF */
static struct ifnet *log_if; /* hook to attach to bpf */
+static struct rwlock log_if_lock;
+#define LOGIF_LOCK_INIT(x) rw_init(&log_if_lock, "ipfw log_if lock")
+#define LOGIF_LOCK_DESTROY(x) rw_destroy(&log_if_lock)
+#define LOGIF_RLOCK(x) rw_rlock(&log_if_lock)
+#define LOGIF_RUNLOCK(x) rw_runlock(&log_if_lock)
+#define LOGIF_WLOCK(x) rw_wlock(&log_if_lock)
+#define LOGIF_WUNLOCK(x) rw_wunlock(&log_if_lock)
+
+#define IPFWNAME "ipfw"
/* we use this dummy function for all ifnet callbacks */
static int
@@ -116,37 +128,105 @@
static const u_char ipfwbroadcastaddr[6] =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+static int
+ipfw_log_clone_match(struct if_clone *ifc, const char *name)
+{
+
+ return (strncmp(name, IPFWNAME, sizeof(IPFWNAME) - 1) == 0);
+}
+
+static int
+ipfw_log_clone_create(struct if_clone *ifc, char *name, size_t len,
+ caddr_t params)
+{
+ int error;
+ int unit;
+ struct ifnet *ifp;
+
+ error = ifc_name2unit(name, &unit);
+ if (error)
+ return (error);
+
+ error = ifc_alloc_unit(ifc, &unit);
+ if (error)
+ return (error);
+
+ ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ ifc_free_unit(ifc, unit);
+ return (ENOSPC);
+ }
+ ifp->if_dname = IPFWNAME;
+ ifp->if_dunit = unit;
+ snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", IPFWNAME, unit);
+ strlcpy(name, ifp->if_xname, len);
+ ifp->if_mtu = 65536;
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_init = (void *)log_dummy;
+ ifp->if_ioctl = log_dummy;
+ ifp->if_start = ipfw_log_start;
+ ifp->if_output = ipfw_log_output;
+ ifp->if_addrlen = 6;
+ ifp->if_hdrlen = 14;
+ ifp->if_broadcastaddr = ipfwbroadcastaddr;
+ ifp->if_baudrate = IF_Mbps(10);
+
+ LOGIF_WLOCK();
+ if (log_if == NULL)
+ log_if = ifp;
+ else {
+ LOGIF_WUNLOCK();
+ if_free(ifp);
+ ifc_free_unit(ifc, unit);
+ return (EEXIST);
+ }
+ LOGIF_WUNLOCK();
+ if_attach(ifp);
+ bpfattach(ifp, DLT_EN10MB, 14);
+
+ return (0);
+}
+
+static int
+ipfw_log_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
+{
+ int unit;
+
+ if (ifp == NULL)
+ return (0);
+
+ LOGIF_WLOCK();
+ if (log_if != NULL && ifp == log_if)
+ log_if = NULL;
+ else {
+ LOGIF_WUNLOCK();
+ return (EINVAL);
+ }
+ LOGIF_WUNLOCK();
+
+ unit = ifp->if_dunit;
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ ifc_free_unit(ifc, unit);
+
+ return (0);
+}
+
+static struct if_clone ipfw_log_cloner = IFC_CLONE_INITIALIZER(
+ IPFWNAME, NULL, IF_MAXUNIT,
+ NULL, ipfw_log_clone_match, ipfw_log_clone_create, ipfw_log_clone_destroy);
+
void
ipfw_log_bpf(int onoff)
{
- struct ifnet *ifp;
if (onoff) {
- if (log_if)
- return;
- ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL)
- return;
- if_initname(ifp, "ipfw", 0);
- ifp->if_mtu = 65536;
- ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_init = (void *)log_dummy;
- ifp->if_ioctl = log_dummy;
- ifp->if_start = ipfw_log_start;
- ifp->if_output = ipfw_log_output;
- ifp->if_addrlen = 6;
- ifp->if_hdrlen = 14;
- if_attach(ifp);
- ifp->if_broadcastaddr = ipfwbroadcastaddr;
- ifp->if_baudrate = IF_Mbps(10);
- bpfattach(ifp, DLT_EN10MB, 14);
- log_if = ifp;
+ LOGIF_LOCK_INIT();
+ if_clone_attach(&ipfw_log_cloner);
} else {
- if (log_if) {
- ether_ifdetach(log_if);
- if_free(log_if);
- }
- log_if = NULL;
+ if_clone_detach(&ipfw_log_cloner);
+ LOGIF_LOCK_DESTROY();
}
}
#endif /* !WITHOUT_BPF */
@@ -166,9 +246,11 @@
if (V_fw_verbose == 0) {
#ifndef WITHOUT_BPF
-
- if (log_if == NULL || log_if->if_bpf == NULL)
+ LOGIF_RLOCK();
+ if (log_if == NULL || log_if->if_bpf == NULL) {
+ LOGIF_RUNLOCK();
return;
+ }
if (args->eh) /* layer2, use orig hdr */
BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
@@ -177,6 +259,7 @@
* more info in the header.
*/
BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
+ LOGIF_RUNLOCK();
#endif /* !WITHOUT_BPF */
return;
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ipfw/ip_fw_private.h
--- a/head/sys/netinet/ipfw/ip_fw_private.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ipfw/ip_fw_private.h Wed Jul 25 16:40:53 2012 +0300
@@ -22,7 +22,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/netinet/ipfw/ip_fw_private.h 233478 2012-03-25 20:37:59Z melifaro $
+ * $FreeBSD: head/sys/netinet/ipfw/ip_fw_private.h 234946 2012-05-03 08:56:43Z melifaro $
*/
#ifndef _IPFW2_PRIVATE_H
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/ipfw/ip_fw_table.c
--- a/head/sys/netinet/ipfw/ip_fw_table.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/ipfw/ip_fw_table.c Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_table.c 233478 2012-03-25 20:37:59Z melifaro $");
+__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_table.c 238265 2012-07-08 21:13:04Z melifaro $");
/*
* Lookup table support for ipfw
@@ -153,6 +153,9 @@
case IPFW_TABLE_CIDR:
if (plen == sizeof(in_addr_t)) {
#ifdef INET
+ /* IPv4 case */
+ if (mlen > 32)
+ return (EINVAL);
ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
ent->value = value;
/* Set 'total' structure length */
@@ -341,9 +344,12 @@
struct xaddr_iface ifname, ifmask;
memset(&ifname, 0, sizeof(ifname));
+ /* Include last \0 into comparison */
+ mlen++;
+
/* Set 'total' structure length */
- KEY_LEN(ifname) = mlen;
- KEY_LEN(ifmask) = mlen;
+ KEY_LEN(ifname) = KEY_LEN_IFACE + mlen;
+ KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen;
/* Assume direct match */
/* FIXME: Add interface pattern matching */
#if 0
@@ -565,7 +571,8 @@
break;
case IPFW_TABLE_INTERFACE:
- KEY_LEN(iface) = strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE);
+ KEY_LEN(iface) = KEY_LEN_IFACE +
+ strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
/* Assume direct match */
/* FIXME: Add interface pattern matching */
xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh));
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/libalias/alias_sctp.h
--- a/head/sys/netinet/libalias/alias_sctp.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/libalias/alias_sctp.h Wed Jul 25 16:40:53 2012 +0300
@@ -45,7 +45,7 @@
*
*/
-/* $FreeBSD: head/sys/netinet/libalias/alias_sctp.h 222809 2011-06-07 06:57:22Z ae $ */
+/* $FreeBSD: head/sys/netinet/libalias/alias_sctp.h 235644 2012-05-19 05:14:24Z marcel $ */
#ifndef _ALIAS_SCTP_H_
#define _ALIAS_SCTP_H_
@@ -92,7 +92,6 @@
#ifndef _KERNEL
#include <stdlib.h>
#include <stdio.h>
-#include <curses.h>
#endif //#ifdef _KERNEL
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/libalias/libalias.3
--- a/head/sys/netinet/libalias/libalias.3 Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/libalias/libalias.3 Wed Jul 25 16:40:53 2012 +0300
@@ -23,9 +23,9 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $FreeBSD: head/sys/netinet/libalias/libalias.3 223773 2011-07-04 23:00:26Z gjb $
+.\" $FreeBSD: head/sys/netinet/libalias/libalias.3 237015 2012-06-13 18:57:27Z joel $
.\"
-.Dd July 04, 2011
+.Dd July 4, 2011
.Dt LIBALIAS 3
.Os
.Sh NAME
@@ -201,11 +201,10 @@
If this mode bit is set, traffic on the local network which does not
originate from unregistered address spaces will be ignored.
Standard Class A, B and C unregistered addresses are:
-.Bd -literal -offset indent
+.Pp
10.0.0.0 -> 10.255.255.255 (Class A subnet)
172.16.0.0 -> 172.31.255.255 (Class B subnets)
192.168.0.0 -> 192.168.255.255 (Class C subnets)
-.Ed
.Pp
This option is useful in the case that the packet aliasing host has both
registered and unregistered subnets on different interfaces.
@@ -499,14 +498,13 @@
New traffic generated by any of the local machines, designated in the
several function calls, will be aliased to the same address.
Consider the following example:
-.Bd -literal -offset indent
+.Pp
LibAliasRedirectAddr(la, inet_aton("192.168.0.2"),
inet_aton("141.221.254.101"));
LibAliasRedirectAddr(la, inet_aton("192.168.0.3"),
inet_aton("141.221.254.101"));
LibAliasRedirectAddr(la, inet_aton("192.168.0.4"),
inet_aton("141.221.254.101"));
-.Ed
.Pp
Any outgoing connections such as
.Xr telnet 1
@@ -919,7 +917,7 @@
.An Paolo Pisati Aq piso at FreeBSD.org
made the library modular, moving support for all
protocols (except for IP, TCP and UDP) to external modules.
-.Sh ACKNOWLEDGMENTS
+.Sh ACKNOWLEDGEMENTS
Listed below, in approximate chronological order, are individuals who
have provided valuable comments and/or debugging assistance.
.Bd -ragged -offset indent
@@ -1277,10 +1275,10 @@
.Ed
.Bl -inset
.It Va name
-is the name of the module
+is the name of the module.
.It Va handle
is a pointer to the module obtained through
-.Xr dlopen 3
+.Xr dlopen 3 .
.El
Whenever a module is loaded in userland, an entry is added to
.Va dll_chain ,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp.h
--- a/head/sys/netinet/sctp.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -29,14 +29,14 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp.h,v 1.18 2005/03/06 16:04:16 itojun Exp $ */
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp.h 233660 2012-03-29 13:36:53Z rrs $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp.h 235990 2012-05-25 11:14:08Z tuexen $");
#ifndef _NETINET_SCTP_H_
#define _NETINET_SCTP_H_
+
#include <sys/types.h>
@@ -265,8 +265,6 @@
#define SCTP_PEELOFF 0x0000800a
/* the real worker for sctp_getaddrlen() */
#define SCTP_GET_ADDR_LEN 0x0000800b
-/* temporary workaround for Apple listen() issue, no args used */
-#define SCTP_LISTEN_FIX 0x0000800c
/* Debug things that need to be purged */
#define SCTP_SET_INITIAL_DBG_SEQ 0x00009f00
@@ -511,35 +509,38 @@
/*
* PCB Features (in sctp_features bitmask)
*/
-#define SCTP_PCB_FLAGS_DO_NOT_PMTUD 0x00000001
-#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002 /* deprecated */
-#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x00000004
-#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x00000008
-#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x00000010
-#define SCTP_PCB_FLAGS_DO_ASCONF 0x00000020
-#define SCTP_PCB_FLAGS_AUTO_ASCONF 0x00000040
-#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x00000080
+#define SCTP_PCB_FLAGS_DO_NOT_PMTUD 0x00000001
+#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002 /* deprecated */
+#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x00000004
+#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x00000008
+#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x00000010
+#define SCTP_PCB_FLAGS_DO_ASCONF 0x00000020
+#define SCTP_PCB_FLAGS_AUTO_ASCONF 0x00000040
+#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x00000080
/* socket options */
-#define SCTP_PCB_FLAGS_NODELAY 0x00000100
-#define SCTP_PCB_FLAGS_AUTOCLOSE 0x00000200
-#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400 /* deprecated */
-#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x00000800
-#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x00001000
-#define SCTP_PCB_FLAGS_RECVPEERERR 0x00002000
-#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT 0x00004000
-#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT 0x00008000
-#define SCTP_PCB_FLAGS_ADAPTATIONEVNT 0x00010000
-#define SCTP_PCB_FLAGS_PDAPIEVNT 0x00020000
-#define SCTP_PCB_FLAGS_AUTHEVNT 0x00040000
-#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x00080000
-#define SCTP_PCB_FLAGS_NO_FRAGMENT 0x00100000
-#define SCTP_PCB_FLAGS_EXPLICIT_EOR 0x00400000
-#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4 0x00800000
-#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x01000000
-#define SCTP_PCB_FLAGS_PORTREUSE 0x02000000
-#define SCTP_PCB_FLAGS_DRYEVNT 0x04000000
-#define SCTP_PCB_FLAGS_RECVRCVINFO 0x08000000
-#define SCTP_PCB_FLAGS_RECVNXTINFO 0x10000000
+#define SCTP_PCB_FLAGS_NODELAY 0x00000100
+#define SCTP_PCB_FLAGS_AUTOCLOSE 0x00000200
+#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400 /* deprecated */
+#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x00000800
+#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x00001000
+#define SCTP_PCB_FLAGS_RECVPEERERR 0x00002000
+#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT 0x00004000 /* deprecated */
+#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT 0x00008000
+#define SCTP_PCB_FLAGS_ADAPTATIONEVNT 0x00010000
+#define SCTP_PCB_FLAGS_PDAPIEVNT 0x00020000
+#define SCTP_PCB_FLAGS_AUTHEVNT 0x00040000
+#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x00080000
+#define SCTP_PCB_FLAGS_NO_FRAGMENT 0x00100000
+#define SCTP_PCB_FLAGS_EXPLICIT_EOR 0x00400000
+#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4 0x00800000
+#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x01000000
+#define SCTP_PCB_FLAGS_PORTREUSE 0x02000000
+#define SCTP_PCB_FLAGS_DRYEVNT 0x04000000
+#define SCTP_PCB_FLAGS_RECVRCVINFO 0x08000000
+#define SCTP_PCB_FLAGS_RECVNXTINFO 0x10000000
+#define SCTP_PCB_FLAGS_ASSOC_RESETEVNT 0x20000000
+#define SCTP_PCB_FLAGS_STREAM_CHANGEEVNT 0x40000000
+#define SCTP_PCB_FLAGS_RECVNSENDFAILEVNT 0x80000000
/*-
* mobility_features parameters (by micchie).Note
@@ -547,14 +548,16 @@
* sctp_mobility_features flags.. not the sctp_features
* flags.
*/
-#define SCTP_MOBILITY_BASE 0x00000001
-#define SCTP_MOBILITY_FASTHANDOFF 0x00000002
-#define SCTP_MOBILITY_PRIM_DELETED 0x00000004
+#define SCTP_MOBILITY_BASE 0x00000001
+#define SCTP_MOBILITY_FASTHANDOFF 0x00000002
+#define SCTP_MOBILITY_PRIM_DELETED 0x00000004
#define SCTP_SMALLEST_PMTU 512 /* smallest pmtu allowed when disabling PMTU
* discovery */
+#undef SCTP_PACKED
+
#include <netinet/sctp_uio.h>
/* This dictates the size of the packet
@@ -606,7 +609,4 @@
#define SCTP_LOG_AT_SEND_2_OUTQ 0x08000000
#define SCTP_LOG_TRY_ADVANCE 0x10000000
-
-#undef SCTP_PACKED
-
#endif /* !_NETINET_SCTP_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_asconf.c
--- a/head/sys/netinet/sctp_asconf.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_asconf.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,9 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_asconf.c,v 1.24 2005/03/06 16:04:16 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_asconf.c 238501 2012-07-15 20:16:17Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_asconf.c 228907 2011-12-27 10:16:24Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
#include <netinet/sctp_sysctl.h>
@@ -49,63 +48,10 @@
* SCTP_DEBUG_ASCONF1: protocol info, general info and errors
* SCTP_DEBUG_ASCONF2: detailed info
*/
-#ifdef SCTP_DEBUG
-#endif /* SCTP_DEBUG */
-static void
-sctp_asconf_get_source_ip(struct mbuf *m, struct sockaddr *sa)
-{
- struct ip *iph;
-
-#ifdef INET
- struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-
-#endif
-
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* IPv4 source */
- sin = (struct sockaddr_in *)sa;
- bzero(sin, sizeof(*sin));
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(struct sockaddr_in);
- sin->sin_port = 0;
- sin->sin_addr.s_addr = iph->ip_src.s_addr;
- break;
- }
-#endif
-#ifdef INET6
- case (IPV6_VERSION >> 4):
- {
- /* IPv6 source */
- struct ip6_hdr *ip6;
-
- sin6 = (struct sockaddr_in6 *)sa;
- bzero(sin6, sizeof(*sin6));
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(struct sockaddr_in6);
- sin6->sin6_port = 0;
- ip6 = mtod(m, struct ip6_hdr *);
- sin6->sin6_addr = ip6->ip6_src;
- break;
- }
-#endif /* INET6 */
- default:
- break;
- }
- return;
-}
-
/*
- * draft-ietf-tsvwg-addip-sctp
+ * RFC 5061
*
* An ASCONF parameter queue exists per asoc which holds the pending address
* operations. Lists are updated upon receipt of ASCONF-ACK.
@@ -197,12 +143,12 @@
}
static struct mbuf *
-sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *aph,
struct sctp_tcb *stcb, int send_hb, int response_required)
{
struct sctp_nets *net;
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_source, sa_store;
+ struct sockaddr_storage sa_store;
struct sctp_paramhdr *ph;
uint16_t param_type, param_length, aparam_length;
struct sockaddr *sa;
@@ -282,11 +228,10 @@
/* if 0.0.0.0/::0, add the source address instead */
if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
- sa = (struct sockaddr *)&sa_source;
- sctp_asconf_get_source_ip(m, sa);
+ sa = src;
SCTPDBG(SCTP_DEBUG_ASCONF1,
"process_asconf_add_ip: using source addr ");
- SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
}
/* add the address */
if (bad_address) {
@@ -346,11 +291,12 @@
}
static struct mbuf *
-sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+sctp_process_asconf_delete_ip(struct sockaddr *src,
+ struct sctp_asconf_paramhdr *aph,
struct sctp_tcb *stcb, int response_required)
{
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_source, sa_store;
+ struct sockaddr_storage sa_store;
struct sctp_paramhdr *ph;
uint16_t param_type, param_length, aparam_length;
struct sockaddr *sa;
@@ -368,9 +314,6 @@
#endif
- /* get the source IP address for src and 0.0.0.0/::0 delete checks */
- sctp_asconf_get_source_ip(m, (struct sockaddr *)&sa_source);
-
aparam_length = ntohs(aph->ph.param_length);
ph = (struct sctp_paramhdr *)(aph + 1);
param_type = ntohs(ph->param_type);
@@ -427,7 +370,7 @@
}
/* make sure the source address is not being deleted */
- if (sctp_cmpaddr(sa, (struct sockaddr *)&sa_source)) {
+ if (sctp_cmpaddr(sa, src)) {
/* trying to delete the source address! */
SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete source addr\n");
m_reply = sctp_asconf_error_response(aph->correlation_id,
@@ -437,8 +380,7 @@
}
/* if deleting 0.0.0.0/::0, delete all addresses except src addr */
if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
- result = sctp_asconf_del_remote_addrs_except(stcb,
- (struct sockaddr *)&sa_source);
+ result = sctp_asconf_del_remote_addrs_except(stcb, src);
if (result) {
/* src address did not exist? */
@@ -478,12 +420,12 @@
}
static struct mbuf *
-sctp_process_asconf_set_primary(struct mbuf *m,
+sctp_process_asconf_set_primary(struct sockaddr *src,
struct sctp_asconf_paramhdr *aph,
struct sctp_tcb *stcb, int response_required)
{
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_source, sa_store;
+ struct sockaddr_storage sa_store;
struct sctp_paramhdr *ph;
uint16_t param_type, param_length, aparam_length;
struct sockaddr *sa;
@@ -553,11 +495,10 @@
/* if 0.0.0.0/::0, use the source address instead */
if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
- sa = (struct sockaddr *)&sa_source;
- sctp_asconf_get_source_ip(m, sa);
+ sa = src;
SCTPDBG(SCTP_DEBUG_ASCONF1,
"process_asconf_set_primary: using source addr ");
- SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
}
/* set the primary address */
if (sctp_set_primary_addr(stcb, sa, NULL) == 0) {
@@ -629,6 +570,7 @@
*/
void
sctp_handle_asconf(struct mbuf *m, unsigned int offset,
+ struct sockaddr *src,
struct sctp_asconf_chunk *cp, struct sctp_tcb *stcb,
int first)
{
@@ -765,13 +707,13 @@
switch (param_type) {
case SCTP_ADD_IP_ADDRESS:
asoc->peer_supports_asconf = 1;
- m_result = sctp_process_asconf_add_ip(m, aph, stcb,
+ m_result = sctp_process_asconf_add_ip(src, aph, stcb,
(cnt < SCTP_BASE_SYSCTL(sctp_hb_maxburst)), error);
cnt++;
break;
case SCTP_DEL_IP_ADDRESS:
asoc->peer_supports_asconf = 1;
- m_result = sctp_process_asconf_delete_ip(m, aph, stcb,
+ m_result = sctp_process_asconf_delete_ip(src, aph, stcb,
error);
break;
case SCTP_ERROR_CAUSE_IND:
@@ -779,7 +721,7 @@
break;
case SCTP_SET_PRIM_ADDR:
asoc->peer_supports_asconf = 1;
- m_result = sctp_process_asconf_set_primary(m, aph,
+ m_result = sctp_process_asconf_set_primary(src, aph,
stcb, error);
break;
case SCTP_NAT_VTAGS:
@@ -859,70 +801,16 @@
* this could happen if the source address was just newly
* added
*/
- struct ip *iph;
- struct sctphdr *sh;
- struct sockaddr_storage from_store;
- struct sockaddr *from = (struct sockaddr *)&from_store;
-
SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: looking up net for IP source address\n");
- /* pullup already done, IP options already stripped */
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- struct sockaddr_in *from4;
-
- sh = (struct sctphdr *)((caddr_t)iph + sizeof(*iph));
- from4 = (struct sockaddr_in *)&from_store;
- bzero(from4, sizeof(*from4));
- from4->sin_family = AF_INET;
- from4->sin_len = sizeof(struct sockaddr_in);
- from4->sin_addr.s_addr = iph->ip_src.s_addr;
- from4->sin_port = sh->src_port;
- break;
- }
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
+ /* look up the from address */
+ stcb->asoc.last_control_chunk_from = sctp_findnet(stcb, src);
+#ifdef SCTP_DEBUG
+ if (stcb->asoc.last_control_chunk_from == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: IP source address not found?!\n");
+ }
#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *from6;
-
- ip6 = mtod(m, struct ip6_hdr *);
- sh = (struct sctphdr *)((caddr_t)ip6 + sizeof(*ip6));
- from6 = (struct sockaddr_in6 *)&from_store;
- bzero(from6, sizeof(*from6));
- from6->sin6_family = AF_INET6;
- from6->sin6_len = sizeof(struct sockaddr_in6);
- from6->sin6_addr = ip6->ip6_src;
- from6->sin6_port = sh->src_port;
- /*
- * Get the scopes in properly to the sin6
- * addr's
- */
- /* we probably don't need these operations */
- (void)sa6_recoverscope(from6);
- sa6_embedscope(from6,
- MODULE_GLOBAL(ip6_use_defzone));
-
- break;
- }
-#endif
- default:
- /* unknown address type */
- from = NULL;
- }
- if (from != NULL) {
- SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: ");
- SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, from);
- /* look up the from address */
- stcb->asoc.last_control_chunk_from = sctp_findnet(stcb, from);
-#ifdef SCTP_DEBUG
- if (stcb->asoc.last_control_chunk_from == NULL)
- SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: IP source address not found?!\n");
-#endif
- }
}
}
@@ -1789,8 +1677,7 @@
*/
if (serial_num == (asoc->asconf_seq_out + 1)) {
SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got unexpected next serial number! Aborting asoc!\n");
- sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_CAUSE_ILLEGAL_ASCONF_ACK, NULL, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, SCTP_SO_NOT_LOCKED);
*abort_no_unlock = 1;
return;
}
@@ -2860,13 +2747,14 @@
struct sctp_paramhdr tmp_param, *ph;
uint16_t plen, ptype;
struct sctp_ifa *sctp_ifa;
- struct sctp_ipv6addr_param addr_store;
#ifdef INET6
+ struct sctp_ipv6addr_param addr6_store;
struct sockaddr_in6 sin6;
#endif
#ifdef INET
+ struct sctp_ipv4addr_param addr4_store;
struct sockaddr_in sin;
#endif
@@ -2915,7 +2803,7 @@
a6p = (struct sctp_ipv6addr_param *)
sctp_m_getptr(m, offset,
sizeof(struct sctp_ipv6addr_param),
- (uint8_t *) & addr_store);
+ (uint8_t *) & addr6_store);
if (plen != sizeof(struct sctp_ipv6addr_param) ||
a6p == NULL) {
return;
@@ -2934,7 +2822,7 @@
/* get the entire IPv4 address param */
a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, offset,
sizeof(struct sctp_ipv4addr_param),
- (uint8_t *) & addr_store);
+ (uint8_t *) & addr4_store);
if (plen != sizeof(struct sctp_ipv4addr_param) ||
a4p == NULL) {
return;
@@ -3012,16 +2900,17 @@
{
struct sctp_paramhdr tmp_param, *ph;
uint16_t plen, ptype;
- struct sctp_ipv6addr_param addr_store;
#ifdef INET
struct sockaddr_in *sin;
struct sctp_ipv4addr_param *a4p;
+ struct sctp_ipv6addr_param addr4_store;
#endif
#ifdef INET6
struct sockaddr_in6 *sin6;
struct sctp_ipv6addr_param *a6p;
+ struct sctp_ipv6addr_param addr6_store;
struct sockaddr_in6 sin6_tmp;
#endif
@@ -3067,7 +2956,7 @@
a6p = (struct sctp_ipv6addr_param *)
sctp_m_getptr(m, offset,
sizeof(struct sctp_ipv6addr_param),
- (uint8_t *) & addr_store);
+ (uint8_t *) & addr6_store);
if (a6p == NULL) {
return (0);
}
@@ -3097,7 +2986,7 @@
a4p = (struct sctp_ipv4addr_param *)
sctp_m_getptr(m, offset,
sizeof(struct sctp_ipv4addr_param),
- (uint8_t *) & addr_store);
+ (uint8_t *) & addr4_store);
if (a4p == NULL) {
return (0);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_asconf.h
--- a/head/sys/netinet/sctp_asconf.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_asconf.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_asconf.h,v 1.8 2005/03/06 16:04:16 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_asconf.h 228653 2011-12-17 19:21:40Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_asconf.h 237715 2012-06-28 16:01:08Z tuexen $");
#ifndef _NETINET_SCTP_ASCONF_H_
#define _NETINET_SCTP_ASCONF_H_
@@ -48,8 +46,8 @@
extern struct mbuf *sctp_compose_asconf(struct sctp_tcb *, int *, int);
extern void
-sctp_handle_asconf(struct mbuf *, unsigned int, struct sctp_asconf_chunk *,
- struct sctp_tcb *, int i);
+sctp_handle_asconf(struct mbuf *, unsigned int, struct sockaddr *,
+ struct sctp_asconf_chunk *, struct sctp_tcb *, int);
extern void
sctp_handle_asconf_ack(struct mbuf *, int, struct sctp_asconf_ack_chunk *,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_auth.c
--- a/head/sys/netinet/sctp_auth.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_auth.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_auth.c 228907 2011-12-27 10:16:24Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_auth.c 235828 2012-05-23 11:26:28Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp.h>
@@ -284,16 +284,16 @@
uint32_t i;
if (key == NULL) {
- printf("%s: [Null key]\n", str);
+ SCTP_PRINTF("%s: [Null key]\n", str);
return;
}
- printf("%s: len %u, ", str, key->keylen);
+ SCTP_PRINTF("%s: len %u, ", str, key->keylen);
if (key->keylen) {
for (i = 0; i < key->keylen; i++)
- printf("%02x", key->key[i]);
- printf("\n");
+ SCTP_PRINTF("%02x", key->key[i]);
+ SCTP_PRINTF("\n");
} else {
- printf("[Null key]\n");
+ SCTP_PRINTF("[Null key]\n");
}
}
@@ -303,16 +303,16 @@
uint32_t i;
if (key == NULL) {
- printf("%s: [Null key]\n", str);
+ SCTP_PRINTF("%s: [Null key]\n", str);
return;
}
- printf("%s: len %u, ", str, key->keylen);
+ SCTP_PRINTF("%s: len %u, ", str, key->keylen);
if (key->keylen) {
for (i = 0; i < key->keylen; i++)
- printf("%02x", key->key[i]);
- printf("\n");
+ SCTP_PRINTF("%02x", key->key[i]);
+ SCTP_PRINTF("\n");
} else {
- printf("[Null key]\n");
+ SCTP_PRINTF("[Null key]\n");
}
}
@@ -1801,7 +1801,7 @@
* shared_key_id, (void
* *)stcb->asoc.authinfo.recv_keyid);
*/
- sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY,
+ sctp_notify_authentication(stcb, SCTP_AUTH_NEW_KEY,
shared_key_id, stcb->asoc.authinfo.recv_keyid,
SCTP_SO_NOT_LOCKED);
/* compute a new recv assoc key and cache it */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_auth.h
--- a/head/sys/netinet/sctp_auth.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_auth.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -31,10 +31,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_auth.h 228653 2011-12-17 19:21:40Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_auth.h 235828 2012-05-23 11:26:28Z tuexen $");
-#ifndef __SCTP_AUTH_H__
-#define __SCTP_AUTH_H__
+#ifndef _NETINET_SCTP_AUTH_H_
+#define _NETINET_SCTP_AUTH_H_
/* digest lengths */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_bsd_addr.c
--- a/head/sys/netinet/sctp_bsd_addr.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_bsd_addr.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_bsd_addr.c 232866 2012-03-12 15:05:17Z rrs $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_bsd_addr.c 237540 2012-06-24 21:25:54Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
@@ -424,11 +422,12 @@
#ifdef SCTP_PACKET_LOGGING
void
-sctp_packet_log(struct mbuf *m, int length)
+sctp_packet_log(struct mbuf *m)
{
int *lenat, thisone;
void *copyto;
uint32_t *tick_tock;
+ int length;
int total_len;
int grabbed_lock = 0;
int value, newval, thisend, thisbegin;
@@ -438,6 +437,7 @@
* (value) -ticks of log (ticks) o -ip packet o -as logged -
* where this started (thisbegin) x <--end points here
*/
+ length = SCTP_HEADER_LEN(m);
total_len = SCTP_SIZE32((length + (4 * sizeof(int))));
/* Log a packet to the buffer. */
if (total_len > SCTP_PACKET_LOG_SIZE) {
@@ -483,7 +483,7 @@
}
/* Sanity check */
if (thisend >= SCTP_PACKET_LOG_SIZE) {
- printf("Insanity stops a log thisbegin:%d thisend:%d writers:%d lock:%d end:%d\n",
+ SCTP_PRINTF("Insanity stops a log thisbegin:%d thisend:%d writers:%d lock:%d end:%d\n",
thisbegin,
thisend,
SCTP_BASE_VAR(packet_log_writers),
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_bsd_addr.h
--- a/head/sys/netinet/sctp_bsd_addr.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_bsd_addr.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -31,10 +31,11 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_bsd_addr.h 228653 2011-12-17 19:21:40Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_bsd_addr.h 237540 2012-06-24 21:25:54Z tuexen $");
-#ifndef __sctp_bsd_addr_h__
-#define __sctp_bsd_addr_h__
+#ifndef _NETINET_SCTP_BSD_ADDR_H_
+#define _NETINET_SCTP_BSD_ADDR_H_
+
#include <netinet/sctp_pcb.h>
#if defined(_KERNEL) || defined(__Userspace__)
@@ -52,7 +53,7 @@
#ifdef SCTP_PACKET_LOGGING
-void sctp_packet_log(struct mbuf *m, int length);
+void sctp_packet_log(struct mbuf *m);
int sctp_copy_out_packet_log(uint8_t * target, int length);
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_cc_functions.c
--- a/head/sys/netinet/sctp_cc_functions.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_cc_functions.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,6 +30,9 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_cc_functions.c 235828 2012-05-23 11:26:28Z tuexen $");
+
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
#include <netinet/sctp_sysctl.h>
@@ -44,8 +47,6 @@
#include <netinet/sctp_auth.h>
#include <netinet/sctp_asconf.h>
#include <netinet/sctp_dtrace_declare.h>
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_cc_functions.c 228907 2011-12-27 10:16:24Z tuexen $");
#define SHIFT_MPTCP_MULTI_N 40
#define SHIFT_MPTCP_MULTI_Z 16
@@ -1594,9 +1595,7 @@
cur_val = net->cwnd >> 10;
indx = SCTP_HS_TABLE_SIZE - 1;
-#ifdef SCTP_DEBUG
- printf("HS CC CAlled.\n");
-#endif
+
if (cur_val < sctp_cwnd_adjust[0].cwnd) {
/* normal mode */
if (net->net_ack > net->mtu) {
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_constants.h
--- a/head/sys/netinet/sctp_constants.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_constants.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,13 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_constants.h,v 1.17 2005/03/06 16:04:17 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_constants.h 235828 2012-05-23 11:26:28Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_constants.h 233660 2012-03-29 13:36:53Z rrs $");
-
-#ifndef __sctp_constants_h__
-#define __sctp_constants_h__
+#ifndef _NETINET_SCTP_CONSTANTS_H_
+#define _NETINET_SCTP_CONSTANTS_H_
/* IANA assigned port number for SCTP over UDP encapsulation */
/* For freebsd we cannot bind the port at
@@ -348,7 +346,7 @@
#define SCTP_NO_FR_UNLESS_SEGMENT_SMALLER 1
/* default max I can burst out after a fast retransmit, 0 disables it */
-#define SCTP_DEF_MAX_BURST 0
+#define SCTP_DEF_MAX_BURST 4
#define SCTP_DEF_HBMAX_BURST 4
#define SCTP_DEF_FRMAX_BURST 4
@@ -460,18 +458,6 @@
#define SCTP_HAS_NAT_SUPPORT 0xc007
#define SCTP_NAT_VTAGS 0xc008
-/* Notification error codes */
-#define SCTP_NOTIFY_DATAGRAM_UNSENT 0x0001
-#define SCTP_NOTIFY_DATAGRAM_SENT 0x0002
-#define SCTP_FAILED_THRESHOLD 0x0004
-#define SCTP_HEARTBEAT_SUCCESS 0x0008
-#define SCTP_RESPONSE_TO_USER_REQ 0x0010
-#define SCTP_INTERNAL_ERROR 0x0020
-#define SCTP_SHUTDOWN_GUARD_EXPIRES 0x0040
-#define SCTP_RECEIVED_SACK 0x0080
-#define SCTP_PEER_FAULTY 0x0100
-#define SCTP_ICMP_REFUSED 0x0200
-
/* bits for TOS field */
#define SCTP_ECT0_BIT 0x02
#define SCTP_ECT1_BIT 0x01
@@ -755,35 +741,29 @@
#define SCTP_NOTIFY_ASSOC_DOWN 2
#define SCTP_NOTIFY_INTERFACE_DOWN 3
#define SCTP_NOTIFY_INTERFACE_UP 4
-#define SCTP_NOTIFY_DG_FAIL 5
-#define SCTP_NOTIFY_STRDATA_ERR 6
-#define SCTP_NOTIFY_ASSOC_ABORTED 7
-#define SCTP_NOTIFY_PEER_OPENED_STREAM 8
-#define SCTP_NOTIFY_STREAM_OPENED_OK 9
+#define SCTP_NOTIFY_SENT_DG_FAIL 5
+#define SCTP_NOTIFY_UNSENT_DG_FAIL 6
+#define SCTP_NOTIFY_SPECIAL_SP_FAIL 7
+#define SCTP_NOTIFY_ASSOC_LOC_ABORTED 8
+#define SCTP_NOTIFY_ASSOC_REM_ABORTED 9
#define SCTP_NOTIFY_ASSOC_RESTART 10
-#define SCTP_NOTIFY_HB_RESP 11
-#define SCTP_NOTIFY_ASCONF_SUCCESS 12
-#define SCTP_NOTIFY_ASCONF_FAILED 13
-#define SCTP_NOTIFY_PEER_SHUTDOWN 14
-#define SCTP_NOTIFY_ASCONF_ADD_IP 15
-#define SCTP_NOTIFY_ASCONF_DELETE_IP 16
-#define SCTP_NOTIFY_ASCONF_SET_PRIMARY 17
-#define SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION 18
-#define SCTP_NOTIFY_INTERFACE_CONFIRMED 20
-#define SCTP_NOTIFY_STR_RESET_RECV 21
-#define SCTP_NOTIFY_STR_RESET_SEND 22
-#define SCTP_NOTIFY_STR_RESET_FAILED_OUT 23
-#define SCTP_NOTIFY_STR_RESET_FAILED_IN 24
-#define SCTP_NOTIFY_AUTH_NEW_KEY 25
-#define SCTP_NOTIFY_AUTH_FREE_KEY 26
-#define SCTP_NOTIFY_SPECIAL_SP_FAIL 27
-#define SCTP_NOTIFY_NO_PEER_AUTH 28
-#define SCTP_NOTIFY_SENDER_DRY 29
-#define SCTP_NOTIFY_STR_RESET_ADD_OK 30
-#define SCTP_NOTIFY_STR_RESET_ADD_FAIL 31
-#define SCTP_NOTIFY_STR_RESET_INSTREAM_ADD_OK 32
-#define SCTP_NOTIFY_MAX 32
-
+#define SCTP_NOTIFY_PEER_SHUTDOWN 11
+#define SCTP_NOTIFY_ASCONF_ADD_IP 12
+#define SCTP_NOTIFY_ASCONF_DELETE_IP 13
+#define SCTP_NOTIFY_ASCONF_SET_PRIMARY 14
+#define SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION 15
+#define SCTP_NOTIFY_INTERFACE_CONFIRMED 16
+#define SCTP_NOTIFY_STR_RESET_RECV 17
+#define SCTP_NOTIFY_STR_RESET_SEND 18
+#define SCTP_NOTIFY_STR_RESET_FAILED_OUT 19
+#define SCTP_NOTIFY_STR_RESET_FAILED_IN 20
+#define SCTP_NOTIFY_STR_RESET_DENIED_OUT 21
+#define SCTP_NOTIFY_STR_RESET_DENIED_IN 22
+#define SCTP_NOTIFY_AUTH_NEW_KEY 23
+#define SCTP_NOTIFY_AUTH_FREE_KEY 24
+#define SCTP_NOTIFY_NO_PEER_AUTH 25
+#define SCTP_NOTIFY_SENDER_DRY 26
+#define SCTP_NOTIFY_REMOTE_ERROR 27
/* This is the value for messages that are NOT completely
* copied down where we will start to split the message.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_crc32.c
--- a/head/sys/netinet/sctp_crc32.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_crc32.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,11 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_crc32.c,v 1.12 2005/03/06 16:04:17 itojun Exp $ */
-
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_crc32.c 235828 2012-05-23 11:26:28Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp.h>
@@ -124,7 +121,9 @@
sctp_delayed_cksum(struct mbuf *m, uint32_t offset)
{
#if defined(SCTP_WITH_NO_CSUM)
+#ifdef INVARIANTS
panic("sctp_delayed_cksum() called when using no SCTP CRC.");
+#endif
#else
uint32_t checksum;
@@ -134,7 +133,7 @@
offset += offsetof(struct sctphdr, checksum);
if (offset + sizeof(uint32_t) > (uint32_t) (m->m_len)) {
- printf("sctp_delayed_cksum(): m->len: %d, off: %d.\n",
+ SCTP_PRINTF("sctp_delayed_cksum(): m->len: %d, off: %d.\n",
(uint32_t) m->m_len, offset);
/*
* XXX this shouldn't happen, but if it does, the correct
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_crc32.h
--- a/head/sys/netinet/sctp_crc32.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_crc32.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,13 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_crc32.h,v 1.5 2004/08/17 04:06:16 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_crc32.h 235828 2012-05-23 11:26:28Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_crc32.h 228653 2011-12-17 19:21:40Z tuexen $");
-
-#ifndef __crc32c_h__
-#define __crc32c_h__
+#ifndef _NETINET_SCTP_CRC32_H_
+#define _NETINET_SCTP_CRC32_H_
#if defined(_KERNEL)
#if !defined(SCTP_WITH_NO_CSUM)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_dtrace_declare.h
--- a/head/sys/netinet/sctp_dtrace_declare.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_dtrace_declare.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,6 +1,6 @@
/*-
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -28,9 +28,13 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_dtrace_declare.h 228653 2011-12-17 19:21:40Z tuexen $");
-#ifndef __sctp_dtrace_declare_h__
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_dtrace_declare.h 235828 2012-05-23 11:26:28Z tuexen $");
+
+#ifndef _NETINET_SCTP_DTRACE_DECLARE_H_
+#define _NETINET_SCTP_DTRACE_DECLARE_H_
+
#include "opt_kdtrace.h"
#include <sys/kernel.h>
#include <sys/sdt.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_dtrace_define.h
--- a/head/sys/netinet/sctp_dtrace_define.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_dtrace_define.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,6 +1,6 @@
/*-
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -28,9 +28,13 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_dtrace_define.h 228653 2011-12-17 19:21:40Z tuexen $");
-#ifndef __sctp_dtrace_define_h__
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_dtrace_define.h 235828 2012-05-23 11:26:28Z tuexen $");
+
+#ifndef _NETINET_SCTP_DTRACE_DEFINE_H_
+#define _NETINET_SCTP_DTRACE_DEFINE_H_
+
#include "opt_kdtrace.h"
#include <sys/kernel.h>
#include <sys/sdt.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_header.h
--- a/head/sys/netinet/sctp_header.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_header.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,13 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_header.h,v 1.14 2005/03/06 16:04:17 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_header.h 235828 2012-05-23 11:26:28Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_header.h 233660 2012-03-29 13:36:53Z rrs $");
-
-#ifndef __sctp_header_h__
-#define __sctp_header_h__
+#ifndef _NETINET_SCTP_HEADER_H_
+#define _NETINET_SCTP_HEADER_H_
#include <sys/time.h>
#include <netinet/sctp.h>
@@ -499,12 +497,13 @@
uint16_t reserved;
} SCTP_PACKED;
-#define SCTP_STREAM_RESET_NOTHING 0x00000000 /* Nothing for me to do */
-#define SCTP_STREAM_RESET_PERFORMED 0x00000001 /* Did it */
-#define SCTP_STREAM_RESET_REJECT 0x00000002 /* refused to do it */
-#define SCTP_STREAM_RESET_ERROR_STR 0x00000003 /* bad Stream no */
-#define SCTP_STREAM_RESET_TRY_LATER 0x00000004 /* collision, try again */
-#define SCTP_STREAM_RESET_BAD_SEQNO 0x00000005 /* bad str-reset seq no */
+#define SCTP_STREAM_RESET_RESULT_NOTHING_TO_DO 0x00000000 /* XXX: unused */
+#define SCTP_STREAM_RESET_RESULT_PERFORMED 0x00000001
+#define SCTP_STREAM_RESET_RESULT_DENIED 0x00000002
+#define SCTP_STREAM_RESET_RESULT_ERR__WRONG_SSN 0x00000003 /* XXX: unused */
+#define SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS 0x00000004
+#define SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO 0x00000005
+#define SCTP_STREAM_RESET_RESULT_IN_PROGRESS 0x00000006 /* XXX: unused */
/*
* convience structures, note that if you are making a request for specific
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_indata.c
--- a/head/sys/netinet/sctp_indata.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_indata.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_indata.c,v 1.36 2005/03/06 16:04:17 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_indata.c 234459 2012-04-19 12:43:19Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_indata.c 237715 2012-06-28 16:01:08Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
@@ -328,7 +326,7 @@
}
SCTP_CALC_TSN_TO_GAP(gap, tsn, asoc->mapping_array_base_tsn);
if (!SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
- printf("gap:%x tsn:%x\n", gap, tsn);
+ SCTP_PRINTF("gap:%x tsn:%x\n", gap, tsn);
sctp_print_mapping_array(asoc);
#ifdef INVARIANTS
panic("Things are really messed up now!!");
@@ -607,9 +605,7 @@
*ippp = ((control->sinfo_stream << 16) | control->sinfo_ssn);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1;
- sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
@@ -892,8 +888,7 @@
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2;
- sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
} else if (asoc->fragmented_delivery_inprogress &&
(chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
@@ -924,8 +919,7 @@
*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3;
- sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
} else if (asoc->fragmented_delivery_inprogress) {
/*
@@ -961,8 +955,7 @@
*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_4;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
} else if ((asoc->fragment_flags & SCTP_DATA_UNORDERED) !=
SCTP_DATA_UNORDERED &&
@@ -995,8 +988,7 @@
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_5;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
}
}
@@ -1090,8 +1082,7 @@
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_6;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1127,9 +1118,7 @@
*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1166,9 +1155,7 @@
*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1202,9 +1189,7 @@
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1247,9 +1232,7 @@
*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1289,9 +1272,7 @@
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1328,9 +1309,7 @@
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1367,9 +1346,7 @@
*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1531,7 +1508,7 @@
struct mbuf *op_err;
op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
- sctp_abort_an_association(stcb->sctp_ep, stcb, 0, op_err, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return (0);
}
@@ -1552,7 +1529,7 @@
*/
if (stcb->sctp_socket->so_rcv.sb_cc) {
/* some to read, wake-up */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -1568,7 +1545,7 @@
}
#endif
sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1678,8 +1655,7 @@
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
- sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return (0);
}
@@ -1942,9 +1918,7 @@
*ippp = ((strmno << 16) | strmseq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
- sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return (0);
} else {
@@ -1980,9 +1954,7 @@
*ippp = ((strmno << 16) | strmseq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return (0);
}
@@ -2027,9 +1999,7 @@
*ippp = ((strmno << 16) | strmseq);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
- sctp_abort_an_association(stcb->sctp_ep,
- stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
-
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return (0);
}
@@ -2308,7 +2278,7 @@
#ifdef INVARIANTS
for (i = 0; i < asoc->mapping_array_size; i++) {
if ((asoc->mapping_array[i]) || (asoc->nr_mapping_array[i])) {
- printf("Error Mapping array's not clean at clear\n");
+ SCTP_PRINTF("Error Mapping array's not clean at clear\n");
sctp_print_mapping_array(asoc);
}
}
@@ -2330,7 +2300,7 @@
#ifdef INVARIANTS
panic("impossible slide");
#else
- printf("impossible slide lgap:%x slide_end:%x slide_from:%x? at:%d\n",
+ SCTP_PRINTF("impossible slide lgap:%x slide_end:%x slide_from:%x? at:%d\n",
lgap, slide_end, slide_from, at);
return;
#endif
@@ -2339,7 +2309,7 @@
#ifdef INVARIANTS
panic("would overrun buffer");
#else
- printf("Gak, would have overrun map end:%d slide_end:%d\n",
+ SCTP_PRINTF("Gak, would have overrun map end:%d slide_end:%d\n",
asoc->mapping_array_size, slide_end);
slide_end = asoc->mapping_array_size;
#endif
@@ -2546,8 +2516,11 @@
int
sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
- struct sctphdr *sh, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- struct sctp_nets *net, uint32_t * high_tsn)
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t * high_tsn,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port)
{
struct sctp_data_chunk *ch, chunk_buf;
struct sctp_association *asoc;
@@ -2654,8 +2627,10 @@
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
- sctp_abort_association(inp, stcb, m, iphlen, sh,
- op_err, 0, net->port);
+ sctp_abort_association(inp, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
return (2);
}
#ifdef SCTP_AUDITING_ENABLED
@@ -2719,7 +2694,12 @@
struct mbuf *op_err;
op_err = sctp_generate_invmanparam(SCTP_CAUSE_PROTOCOL_VIOLATION);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, 0, net->port);
+ sctp_abort_association(inp, stcb,
+ m, iphlen,
+ src, dst,
+ sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
return (2);
}
break;
@@ -2784,7 +2764,7 @@
/*
* we need to report rwnd overrun drops.
*/
- sctp_send_packet_dropped(stcb, net, *mm, iphlen, 0);
+ sctp_send_packet_dropped(stcb, net, *mm, length, iphlen, 0);
}
if (num_chunks) {
/*
@@ -3222,8 +3202,7 @@
if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) {
/* Yes so drop it */
if (tp1->data != NULL) {
- (void)sctp_release_pr_sctp_chunk(stcb, tp1,
- (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ (void)sctp_release_pr_sctp_chunk(stcb, tp1, 1,
SCTP_SO_NOT_LOCKED);
}
continue;
@@ -3480,8 +3459,7 @@
if (tp1->snd_count > tp1->rec.data.timetodrop.tv_sec) {
/* Yes, so drop it */
if (tp1->data != NULL) {
- (void)sctp_release_pr_sctp_chunk(stcb, tp1,
- (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ (void)sctp_release_pr_sctp_chunk(stcb, tp1, 1,
SCTP_SO_NOT_LOCKED);
}
/* Make sure to flag we had a FR */
@@ -3489,7 +3467,10 @@
continue;
}
}
- /* printf("OK, we are now ready to FR this guy\n"); */
+ /*
+ * SCTP_PRINTF("OK, we are now ready to FR this
+ * guy\n");
+ */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
sctp_log_fr(tp1->rec.data.TSN_seq, tp1->snd_count,
0, SCTP_FR_MARKED);
@@ -3557,7 +3538,7 @@
tot_retrans++;
/* mark the sending seq for possible subsequent FR's */
/*
- * printf("Marking TSN for FR new value %x\n",
+ * SCTP_PRINTF("Marking TSN for FR new value %x\n",
* (uint32_t)tpi->rec.data.TSN_seq);
*/
if (TAILQ_EMPTY(&asoc->send_queue)) {
@@ -3657,8 +3638,7 @@
/* Yes so drop it */
if (tp1->data) {
(void)sctp_release_pr_sctp_chunk(stcb, tp1,
- (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
- SCTP_SO_NOT_LOCKED);
+ 1, SCTP_SO_NOT_LOCKED);
}
} else {
/*
@@ -3709,11 +3689,10 @@
TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
if (chk->sent < SCTP_DATAGRAM_RESEND) {
- printf("Chk TSN:%u size:%d inflight cnt:%d\n",
+ SCTP_PRINTF("Chk TSN:%u size:%d inflight cnt:%d\n",
chk->rec.data.TSN_seq,
chk->send_size,
- chk->snd_count
- );
+ chk->snd_count);
inflight++;
} else if (chk->sent == SCTP_DATAGRAM_RESEND) {
resend++;
@@ -3730,7 +3709,7 @@
#ifdef INVARIANTS
panic("Flight size-express incorrect? \n");
#else
- printf("asoc->total_flight:%d cnt:%d\n",
+ SCTP_PRINTF("asoc->total_flight:%d cnt:%d\n",
entry_flight, entry_cnt);
SCTP_PRINTF("Flight size-express incorrect F:%d I:%d R:%d Ab:%d ACK:%d\n",
@@ -3876,7 +3855,7 @@
*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
- sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
return;
#endif
}
@@ -3895,7 +3874,7 @@
TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) {
if (SCTP_TSN_GE(cumack, tp1->rec.data.TSN_seq)) {
if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
- printf("Warning, an unsent is now acked?\n");
+ SCTP_PRINTF("Warning, an unsent is now acked?\n");
}
if (tp1->sent < SCTP_DATAGRAM_ACKED) {
/*
@@ -4005,7 +3984,7 @@
}
/* sa_ignore NO_NULL_CHK */
if (stcb->sctp_socket) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -4014,7 +3993,7 @@
/* sa_ignore NO_NULL_CHK */
sctp_wakeup_log(stcb, 1, SCTP_WAKESND_FROM_SACK);
}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4028,7 +4007,7 @@
}
#endif
sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
} else {
@@ -4050,7 +4029,7 @@
/* addr came good */
net->dest_state |= SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
- SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+ 0, (void *)net, SCTP_SO_NOT_LOCKED);
}
if (net == stcb->asoc.primary_destination) {
if (stcb->asoc.alternate) {
@@ -4238,7 +4217,7 @@
*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_24);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24;
- sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
} else {
struct sctp_nets *netp;
@@ -4437,10 +4416,10 @@
* no way, we have not even sent this TSN out yet.
* Peer is hopelessly messed up with us.
*/
- printf("NEW cum_ack:%x send_s:%x is smaller or equal\n",
+ SCTP_PRINTF("NEW cum_ack:%x send_s:%x is smaller or equal\n",
cum_ack, send_s);
if (tp1) {
- printf("Got send_s from tsn:%x + 1 of tp1:%p\n",
+ SCTP_PRINTF("Got send_s from tsn:%x + 1 of tp1:%p\n",
tp1->rec.data.TSN_seq, tp1);
}
hopeless_peer:
@@ -4461,7 +4440,7 @@
*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
- sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
return;
}
}
@@ -4681,10 +4660,8 @@
* peer is either confused or we are under
* attack. We must abort.
*/
- printf("Hopeless peer! biggest_tsn_acked:%x largest seq:%x\n",
- biggest_tsn_acked,
- send_s);
-
+ SCTP_PRINTF("Hopeless peer! biggest_tsn_acked:%x largest seq:%x\n",
+ biggest_tsn_acked, send_s);
goto hopeless_peer;
}
}
@@ -4719,7 +4696,7 @@
}
if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
/* no more sent on list */
- printf("Warning, tp1->sent == %d and its now acked?\n",
+ SCTP_PRINTF("Warning, tp1->sent == %d and its now acked?\n",
tp1->sent);
}
TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
@@ -4759,7 +4736,7 @@
}
/* sa_ignore NO_NULL_CHK */
if ((wake_him) && (stcb->sctp_socket)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -4767,7 +4744,7 @@
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) {
sctp_wakeup_log(stcb, wake_him, SCTP_WAKESND_FROM_SACK);
}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4781,7 +4758,7 @@
}
#endif
sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
} else {
@@ -4859,7 +4836,7 @@
/* addr came good */
net->dest_state |= SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
- SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+ 0, (void *)net, SCTP_SO_NOT_LOCKED);
}
if (net == stcb->asoc.primary_destination) {
if (stcb->asoc.alternate) {
@@ -4966,7 +4943,7 @@
*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_31);
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_31;
- sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
return;
} else {
struct sctp_nets *netp;
@@ -5421,8 +5398,7 @@
*ippp = new_cum_tsn;
}
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33;
- sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
return;
}
SCTP_STAT_INCR(sctps_fwdtsn_map_over);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_indata.h
--- a/head/sys/netinet/sctp_indata.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_indata.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,13 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_indata.h,v 1.9 2005/03/06 16:04:17 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_indata.h 237715 2012-06-28 16:01:08Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_indata.h 228653 2011-12-17 19:21:40Z tuexen $");
-
-#ifndef __sctp_indata_h__
-#define __sctp_indata_h__
+#ifndef _NETINET_SCTP_INDATA_H_
+#define _NETINET_SCTP_INDATA_H_
#if defined(_KERNEL) || defined(__Userspace__)
@@ -113,9 +111,13 @@
sctp_update_acked(struct sctp_tcb *, struct sctp_shutdown_chunk *, int *);
int
-sctp_process_data(struct mbuf **, int, int *, int, struct sctphdr *,
+sctp_process_data(struct mbuf **, int, int *, int,
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *,
struct sctp_inpcb *, struct sctp_tcb *,
- struct sctp_nets *, uint32_t *);
+ struct sctp_nets *, uint32_t *,
+ uint8_t, uint32_t,
+ uint32_t, uint16_t);
void sctp_slide_mapping_arrays(struct sctp_tcb *stcb);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_input.c
--- a/head/sys/netinet/sctp_input.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_input.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_input.c,v 1.27 2005/03/06 16:04:17 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_input.c 234461 2012-04-19 13:11:17Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_input.c 238454 2012-07-14 19:44:39Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
@@ -82,9 +80,12 @@
/* INIT handler */
static void
-sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
- struct sctp_init_chunk *cp, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- int *abort_no_unlock, uint32_t vrf_id, uint16_t port)
+sctp_handle_init(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
+ struct sctp_init_chunk *cp, struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, int *abort_no_unlock,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port)
{
struct sctp_init *init;
struct mbuf *op_err;
@@ -97,7 +98,8 @@
/* validate length */
if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) {
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ use_mflowid, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -108,7 +110,8 @@
if (init->initiate_tag == 0) {
/* protocol error... send abort */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ use_mflowid, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -117,7 +120,8 @@
if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) {
/* invalid parameter... send abort */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ use_mflowid, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -126,7 +130,8 @@
if (init->num_inbound_streams == 0) {
/* protocol error... send abort */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ use_mflowid, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -135,7 +140,8 @@
if (init->num_outbound_streams == 0) {
/* protocol error... send abort */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ use_mflowid, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -144,7 +150,9 @@
if (sctp_validate_init_auth_params(m, offset + sizeof(*cp),
offset + ntohs(cp->ch.chunk_length))) {
/* auth parameter(s) error... send abort */
- sctp_abort_association(inp, stcb, m, iphlen, sh, NULL, vrf_id, port);
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, NULL,
+ use_mflowid, mflowid,
+ vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
goto outnow;
@@ -171,7 +179,9 @@
* state :-)
*/
if (SCTP_BASE_SYSCTL(sctp_blackhole) == 0) {
- sctp_send_abort(m, iphlen, sh, 0, NULL, vrf_id, port);
+ sctp_send_abort(m, iphlen, src, dst, sh, 0, NULL,
+ use_mflowid, mflowid,
+ vrf_id, port);
}
goto outnow;
}
@@ -182,7 +192,10 @@
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
} else {
SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
- sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, sh, cp, vrf_id, port,
+ sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, src, dst,
+ sh, cp,
+ use_mflowid, mflowid,
+ vrf_id, port,
((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
}
outnow:
@@ -300,8 +313,8 @@
asoc->send_queue_cnt--;
if (chk->data != NULL) {
sctp_free_bufspace(stcb, asoc, chk, 1);
- sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
- SCTP_NOTIFY_DATAGRAM_UNSENT, chk, SCTP_SO_NOT_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
+ 0, chk, SCTP_SO_NOT_LOCKED);
if (chk->data) {
sctp_m_freem(chk->data);
chk->data = NULL;
@@ -318,8 +331,7 @@
TAILQ_REMOVE(&outs->outqueue, sp, next);
asoc->stream_queue_cnt--;
sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL,
- stcb, SCTP_NOTIFY_DATAGRAM_UNSENT,
- sp, SCTP_SO_NOT_LOCKED);
+ stcb, 0, sp, SCTP_SO_NOT_LOCKED);
if (sp->data) {
sctp_m_freem(sp->data);
sp->data = NULL;
@@ -410,8 +422,11 @@
*/
static int
sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
- struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
- struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+ struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
+ struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, int *abort_no_unlock,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id)
{
struct sctp_association *asoc;
struct mbuf *op_err;
@@ -428,7 +443,7 @@
&abort_flag, (struct sctp_chunkhdr *)cp, &nat_friendly);
if (abort_flag) {
/* Send an abort and notify peer */
- sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_CAUSE_PROTOCOL_VIOLATION, op_err, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_no_unlock = 1;
return (-1);
}
@@ -442,14 +457,16 @@
initack_limit = offset + ntohs(cp->ch.chunk_length);
/* load all addresses */
if ((retval = sctp_load_addresses_from_init(stcb, m,
- (offset + sizeof(struct sctp_init_chunk)), initack_limit, sh,
- NULL))) {
+ (offset + sizeof(struct sctp_init_chunk)), initack_limit,
+ src, dst, NULL))) {
/* Huh, we should abort */
SCTPDBG(SCTP_DEBUG_INPUT1,
"Load addresses from INIT causes an abort %d\n",
retval);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- NULL, 0, net->port);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, NULL,
+ use_mflowid, mflowid,
+ vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
}
@@ -523,7 +540,9 @@
mp->resv = 0;
}
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
- sh, op_err, vrf_id, net->port);
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, net->port);
*abort_no_unlock = 1;
}
return (retval);
@@ -632,7 +651,7 @@
if (!(r_net->dest_state & SCTP_ADDR_REACHABLE)) {
r_net->dest_state |= SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
- SCTP_HEARTBEAT_SUCCESS, (void *)r_net, SCTP_SO_NOT_LOCKED);
+ 0, (void *)r_net, SCTP_SO_NOT_LOCKED);
}
if (r_net->dest_state & SCTP_ADDR_PF) {
r_net->dest_state &= ~SCTP_ADDR_PF;
@@ -739,61 +758,51 @@
static void
-sctp_handle_abort(struct sctp_abort_chunk *cp,
+sctp_handle_abort(struct sctp_abort_chunk *abort,
struct sctp_tcb *stcb, struct sctp_nets *net)
{
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
uint16_t len;
+ uint16_t error;
SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: handling ABORT\n");
if (stcb == NULL)
return;
- len = ntohs(cp->ch.chunk_length);
+ len = ntohs(abort->ch.chunk_length);
if (len > sizeof(struct sctp_chunkhdr)) {
/*
* Need to check the cause codes for our two magic nat
* aborts which don't kill the assoc necessarily.
*/
- struct sctp_abort_chunk *cpnext;
struct sctp_missing_nat_state *natc;
- uint16_t cause;
-
- cpnext = cp;
- cpnext++;
- natc = (struct sctp_missing_nat_state *)cpnext;
- cause = ntohs(natc->cause);
- if (cause == SCTP_CAUSE_NAT_COLLIDING_STATE) {
+
+ natc = (struct sctp_missing_nat_state *)(abort + 1);
+ error = ntohs(natc->cause);
+ if (error == SCTP_CAUSE_NAT_COLLIDING_STATE) {
SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n",
- cp->ch.chunk_flags);
+ abort->ch.chunk_flags);
if (sctp_handle_nat_colliding_state(stcb)) {
return;
}
- } else if (cause == SCTP_CAUSE_NAT_MISSING_STATE) {
+ } else if (error == SCTP_CAUSE_NAT_MISSING_STATE) {
SCTPDBG(SCTP_DEBUG_INPUT2, "Received missing state abort flags:%x\n",
- cp->ch.chunk_flags);
+ abort->ch.chunk_flags);
if (sctp_handle_nat_missing_state(stcb, net)) {
return;
}
}
+ } else {
+ error = 0;
}
/* stop any receive timers */
sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
/* notify user of the abort and clean up... */
- sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+ sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED);
/* free the tcb */
-#if defined(SCTP_PANIC_ON_ABORT)
- printf("stcb:%p state:%d rport:%d net:%p\n",
- stcb, stcb->asoc.state, stcb->rport, net);
- if (!(stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
- panic("Received an ABORT");
- } else {
- printf("No panic its in state %x closed\n", stcb->asoc.state);
- }
-#endif
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
(SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
@@ -802,7 +811,7 @@
#ifdef SCTP_ASOCLOG_OF_TSNS
sctp_print_out_track_log(stcb);
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -813,7 +822,7 @@
stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n");
@@ -855,7 +864,7 @@
struct sctp_association *asoc;
int some_on_streamwheel;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -887,7 +896,7 @@
asoc->control_pdapi->pdapi_aborted = 1;
asoc->control_pdapi = NULL;
SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -901,7 +910,7 @@
}
#endif
sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -961,7 +970,7 @@
{
struct sctp_association *asoc;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -995,7 +1004,7 @@
asoc->control_pdapi->pdapi_aborted = 1;
asoc->control_pdapi = NULL;
SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
@@ -1008,7 +1017,7 @@
}
#endif
sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1016,7 +1025,7 @@
if (!TAILQ_EMPTY(&asoc->send_queue) ||
!TAILQ_EMPTY(&asoc->sent_queue) ||
!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
- sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED);
+ sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED);
}
/* stop the timer */
sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
@@ -1032,7 +1041,7 @@
}
SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
/* free the TCB but first save off the ep */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
@@ -1041,7 +1050,7 @@
#endif
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1122,12 +1131,12 @@
{
int chklen;
struct sctp_paramhdr *phdr;
- uint16_t error_type;
+ uint16_t error, error_type;
uint16_t error_len;
struct sctp_association *asoc;
int adjust;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -1137,6 +1146,7 @@
phdr = (struct sctp_paramhdr *)((caddr_t)ch +
sizeof(struct sctp_chunkhdr));
chklen = ntohs(ch->chunk_length) - sizeof(struct sctp_chunkhdr);
+ error = 0;
while ((size_t)chklen >= sizeof(struct sctp_paramhdr)) {
/* Process an Error Cause */
error_type = ntohs(phdr->param_type);
@@ -1147,6 +1157,10 @@
chklen, error_len);
return (0);
}
+ if (error == 0) {
+ /* report the first error cause */
+ error = error_type;
+ }
switch (error_type) {
case SCTP_CAUSE_INVALID_STREAM:
case SCTP_CAUSE_MISSING_PARAM:
@@ -1183,9 +1197,9 @@
asoc->stale_cookie_count++;
if (asoc->stale_cookie_count >
asoc->max_init_times) {
- sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+ sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED);
/* now free the asoc */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -1195,7 +1209,7 @@
#endif
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
return (-1);
@@ -1263,13 +1277,17 @@
chklen -= adjust;
phdr = (struct sctp_paramhdr *)((caddr_t)phdr + adjust);
}
+ sctp_ulp_notify(SCTP_NOTIFY_REMOTE_ERROR, stcb, error, ch, SCTP_SO_NOT_LOCKED);
return (0);
}
static int
sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
- struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
- struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+ struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
+ struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, int *abort_no_unlock,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id)
{
struct sctp_init_ack *init_ack;
struct mbuf *op_err;
@@ -1285,8 +1303,10 @@
if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) {
/* Invalid length */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err, 0, net->port);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
}
@@ -1295,32 +1315,40 @@
if (init_ack->initiate_tag == 0) {
/* protocol error... send an abort */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err, 0, net->port);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
}
if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) {
/* protocol error... send an abort */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err, 0, net->port);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
}
if (init_ack->num_inbound_streams == 0) {
/* protocol error... send an abort */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err, 0, net->port);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
}
if (init_ack->num_outbound_streams == 0) {
/* protocol error... send an abort */
op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err, 0, net->port);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
}
@@ -1342,8 +1370,10 @@
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED);
}
- if (sctp_process_init_ack(m, iphlen, offset, sh, cp, stcb,
- net, abort_no_unlock, vrf_id) < 0) {
+ if (sctp_process_init_ack(m, iphlen, offset, src, dst, sh, cp, stcb,
+ net, abort_no_unlock,
+ use_mflowid, mflowid,
+ vrf_id) < 0) {
/* error in parsing parameters */
return (-1);
}
@@ -1394,10 +1424,12 @@
static struct sctp_tcb *
sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
struct sctp_inpcb *inp, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+ uint8_t use_mflowid, uint32_t mflowid,
uint32_t vrf_id, uint16_t port);
@@ -1409,10 +1441,13 @@
*/
static struct sctp_tcb *
sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
- uint32_t vrf_id, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, uint16_t port)
+ int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port)
{
struct sctp_association *asoc;
struct sctp_init_chunk *init_cp, init_buf;
@@ -1449,7 +1484,8 @@
ph = mtod(op_err, struct sctp_paramhdr *);
ph->param_type = htons(SCTP_CAUSE_COOKIE_IN_SHUTDOWN);
ph->param_length = htons(sizeof(struct sctp_paramhdr));
- sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag,
+ sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
+ use_mflowid, mflowid,
vrf_id, net->port);
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 2;
@@ -1551,7 +1587,7 @@
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
(inp->sctp_socket->so_qlimit == 0)
) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -1563,7 +1599,7 @@
*/
stcb->sctp_ep->sctp_flags |=
SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -1576,7 +1612,7 @@
}
#endif
soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1613,7 +1649,7 @@
*/
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, sh, init_src)) {
+ initack_offset, src, dst, init_src)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 4;
return (NULL);
@@ -1674,7 +1710,9 @@
ph = mtod(op_err, struct sctp_paramhdr *);
ph->param_type = htons(SCTP_CAUSE_NAT_COLLIDING_STATE);
ph->param_length = htons(sizeof(struct sctp_paramhdr));
- sctp_send_abort(m, iphlen, sh, 0, op_err, vrf_id, port);
+ sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
return (NULL);
}
if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
@@ -1755,7 +1793,7 @@
}
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, sh, init_src)) {
+ initack_offset, src, dst, init_src)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 10;
return (NULL);
@@ -1767,13 +1805,13 @@
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
(inp->sctp_socket->so_qlimit == 0)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
stcb->sctp_ep->sctp_flags |=
SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -1786,7 +1824,7 @@
}
#endif
soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1836,9 +1874,11 @@
* cookie_new code since we are allowing a duplicate
* association. I hope this works...
*/
- return (sctp_process_cookie_new(m, iphlen, offset, sh, cookie, cookie_len,
+ return (sctp_process_cookie_new(m, iphlen, offset, src, dst,
+ sh, cookie, cookie_len,
inp, netp, init_src, notification,
auth_skipped, auth_offset, auth_len,
+ use_mflowid, mflowid,
vrf_id, port));
}
/*
@@ -1899,7 +1939,7 @@
/* send up all the data */
SCTP_TCB_SEND_LOCK(stcb);
- sctp_report_all_outbound(stcb, 1, SCTP_SO_NOT_LOCKED);
+ sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_NOT_LOCKED);
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
stcb->asoc.strmout[i].stream_no = i;
stcb->asoc.strmout[i].next_sequence_sent = 0;
@@ -1940,7 +1980,7 @@
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, sh, init_src)) {
+ initack_offset, src, dst, init_src)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 14;
@@ -1969,12 +2009,14 @@
* cookie-echo chunk length: length of the cookie chunk to: where the init
* was from returns a new TCB
*/
-struct sctp_tcb *
+static struct sctp_tcb *
sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
struct sctp_inpcb *inp, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+ uint8_t use_mflowid, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_tcb *stcb;
@@ -1996,7 +2038,7 @@
struct sockaddr_in6 *sin6;
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(inp);
@@ -2069,7 +2111,9 @@
op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
- sh, op_err, vrf_id, port);
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
return (NULL);
}
/* get the correct sctp_nets */
@@ -2095,15 +2139,17 @@
atomic_add_int(&stcb->asoc.refcnt, 1);
op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
- sh, op_err, vrf_id, port);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2128,13 +2174,13 @@
retval = 0;
if (retval < 0) {
atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2142,16 +2188,16 @@
}
/* load all addresses */
if (sctp_load_addresses_from_init(stcb, m,
- init_offset + sizeof(struct sctp_init_chunk), initack_offset, sh,
- init_src)) {
+ init_offset + sizeof(struct sctp_init_chunk), initack_offset,
+ src, dst, init_src)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2174,13 +2220,13 @@
SCTPDBG(SCTP_DEBUG_AUTH1,
"COOKIE-ECHO: AUTH failed\n");
atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2237,13 +2283,13 @@
#endif
default:
atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2264,7 +2310,7 @@
* a bit of protection is worth having..
*/
stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
@@ -2276,7 +2322,7 @@
}
#endif
soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
} else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
@@ -2339,10 +2385,13 @@
*/
static struct mbuf *
sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp,
struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
- struct sctp_tcb **locked_tcb, uint32_t vrf_id, uint16_t port)
+ struct sctp_tcb **locked_tcb,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port)
{
struct sctp_state_cookie *cookie;
struct sctp_tcb *l_stcb = *stcb;
@@ -2353,13 +2402,10 @@
uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE];
uint8_t *sig;
uint8_t cookie_ok = 0;
- unsigned int size_of_pkt, sig_offset, cookie_offset;
+ unsigned int sig_offset, cookie_offset;
unsigned int cookie_len;
struct timeval now;
struct timeval time_expires;
- struct sockaddr_storage dest_store;
- struct sockaddr *localep_sa = (struct sockaddr *)&dest_store;
- struct ip *iph;
int notification = 0;
struct sctp_nets *netl;
int had_a_existing_tcb = 0;
@@ -2380,47 +2426,6 @@
if (inp_p == NULL) {
return (NULL);
}
- /* First get the destination address setup too. */
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* its IPv4 */
- struct sockaddr_in *lsin;
-
- lsin = (struct sockaddr_in *)(localep_sa);
- memset(lsin, 0, sizeof(*lsin));
- lsin->sin_family = AF_INET;
- lsin->sin_len = sizeof(*lsin);
- lsin->sin_port = sh->dest_port;
- lsin->sin_addr.s_addr = iph->ip_dst.s_addr;
- size_of_pkt = SCTP_GET_IPV4_LENGTH(iph);
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- /* its IPv6 */
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *lsin6;
-
- lsin6 = (struct sockaddr_in6 *)(localep_sa);
- memset(lsin6, 0, sizeof(*lsin6));
- lsin6->sin6_family = AF_INET6;
- lsin6->sin6_len = sizeof(struct sockaddr_in6);
- ip6 = mtod(m, struct ip6_hdr *);
- lsin6->sin6_port = sh->dest_port;
- lsin6->sin6_addr = ip6->ip6_dst;
- size_of_pkt = SCTP_GET_IPV6_LENGTH(ip6) + iphlen;
- break;
- }
-#endif
- default:
- return (NULL);
- }
-
cookie = &cp->cookie;
cookie_offset = offset + sizeof(struct sctp_chunkhdr);
cookie_len = ntohs(cp->ch.chunk_length);
@@ -2437,11 +2442,10 @@
*/
return (NULL);
}
- if (cookie_len > size_of_pkt ||
- cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
+ if (cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
sizeof(struct sctp_init_chunk) +
sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) {
- /* cookie too long! or too small */
+ /* cookie too small */
return (NULL);
}
/*
@@ -2449,11 +2453,6 @@
* calculated in the sctp_hmac_m() call).
*/
sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE;
- if (sig_offset > size_of_pkt) {
- /* packet not correct size! */
- /* XXX this may already be accounted for earlier... */
- return (NULL);
- }
m_sig = m_split(m, sig_offset, M_DONTWAIT);
if (m_sig == NULL) {
/* out of memory or ?? */
@@ -2577,7 +2576,8 @@
if (tim == 0)
tim = now.tv_usec - cookie->time_entered.tv_usec;
scm->time_usec = htonl(tim);
- sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag,
+ sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
+ use_mflowid, mflowid,
vrf_id, port);
return (NULL);
}
@@ -2620,7 +2620,7 @@
}
if ((*stcb == NULL) && to) {
/* Yep, lets check */
- *stcb = sctp_findassociation_ep_addr(inp_p, to, netp, localep_sa, NULL);
+ *stcb = sctp_findassociation_ep_addr(inp_p, to, netp, dst, NULL);
if (*stcb == NULL) {
/*
* We should have only got back the same inp. If we
@@ -2663,23 +2663,29 @@
cookie_len -= SCTP_SIGNATURE_SIZE;
if (*stcb == NULL) {
/* this is the "normal" case... get a new TCB */
- *stcb = sctp_process_cookie_new(m, iphlen, offset, sh, cookie,
- cookie_len, *inp_p, netp, to, ¬ification,
- auth_skipped, auth_offset, auth_len, vrf_id, port);
+ *stcb = sctp_process_cookie_new(m, iphlen, offset, src, dst, sh,
+ cookie, cookie_len, *inp_p,
+ netp, to, ¬ification,
+ auth_skipped, auth_offset, auth_len,
+ use_mflowid, mflowid,
+ vrf_id, port);
} else {
/* this is abnormal... cookie-echo on existing TCB */
had_a_existing_tcb = 1;
- *stcb = sctp_process_cookie_existing(m, iphlen, offset, sh,
+ *stcb = sctp_process_cookie_existing(m, iphlen, offset,
+ src, dst, sh,
cookie, cookie_len, *inp_p, *stcb, netp, to,
- ¬ification, vrf_id, auth_skipped, auth_offset, auth_len, port);
+ ¬ification, auth_skipped, auth_offset, auth_len,
+ use_mflowid, mflowid,
+ vrf_id, port);
}
if (*stcb == NULL) {
/* still no TCB... must be bad cookie-echo */
return (NULL);
}
- if ((*netp != NULL) && (m->m_flags & M_FLOWID)) {
- (*netp)->flowid = m->m_pkthdr.flowid;
+ if ((*netp != NULL) && (use_mflowid != 0)) {
+ (*netp)->flowid = mflowid;
#ifdef INVARIANTS
(*netp)->flowidset = 1;
#endif
@@ -2744,7 +2750,7 @@
if (so == NULL) {
struct mbuf *op_err;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *pcb_so;
#endif
@@ -2752,8 +2758,10 @@
SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n");
op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
sctp_abort_association(*inp_p, NULL, m, iphlen,
- sh, op_err, vrf_id, port);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
pcb_so = SCTP_INP_SO(*inp_p);
atomic_add_int(&(*stcb)->asoc.refcnt, 1);
SCTP_TCB_UNLOCK((*stcb));
@@ -2762,7 +2770,7 @@
atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(pcb_so, 1);
#endif
return (NULL);
@@ -2852,13 +2860,13 @@
* Pull it from the incomplete queue and wake the
* guy
*/
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
atomic_add_int(&(*stcb)->asoc.refcnt, 1);
SCTP_TCB_UNLOCK((*stcb));
SCTP_SOCKET_LOCK(so, 1);
#endif
soisconnected(so);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_LOCK((*stcb));
atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
SCTP_SOCKET_UNLOCK(so, 1);
@@ -2866,14 +2874,12 @@
return (m);
}
}
- if ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
- if (notification) {
- sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
- }
- if (send_int_conf) {
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
- (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
- }
+ if (notification) {
+ sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+ }
+ if (send_int_conf) {
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+ (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
}
return (m);
}
@@ -2916,12 +2922,12 @@
sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -2932,7 +2938,7 @@
if ((stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) == 0) {
soisconnected(stcb->sctp_socket);
}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -3160,7 +3166,7 @@
{
struct sctp_association *asoc;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -3186,7 +3192,7 @@
if (!TAILQ_EMPTY(&asoc->send_queue) ||
!TAILQ_EMPTY(&asoc->sent_queue) ||
!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
- sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED);
+ sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED);
}
}
/* stop the timer */
@@ -3195,7 +3201,7 @@
/* free the TCB */
SCTPDBG(SCTP_DEBUG_INPUT2,
"sctp_handle_shutdown_complete: calls free-asoc\n");
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -3204,7 +3210,7 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
return;
@@ -3599,9 +3605,11 @@
asoc->stream_reset_out_is_outstanding = 0;
if (asoc->stream_reset_outstanding)
asoc->stream_reset_outstanding--;
- if (action == SCTP_STREAM_RESET_PERFORMED) {
+ if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
/* do it */
sctp_reset_out_streams(stcb, number_entries, srparam->list_of_streams);
+ } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
} else {
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
}
@@ -3610,7 +3618,10 @@
number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t);
if (asoc->stream_reset_outstanding)
asoc->stream_reset_outstanding--;
- if (action != SCTP_STREAM_RESET_PERFORMED) {
+ if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_IN, stcb,
+ number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ } else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) {
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb,
number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
}
@@ -3626,20 +3637,26 @@
stcb->asoc.strm_pending_add_size = 0;
if (asoc->stream_reset_outstanding)
asoc->stream_reset_outstanding--;
- if (action == SCTP_STREAM_RESET_PERFORMED) {
+ if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
/* Put the new streams into effect */
stcb->asoc.streamoutcnt += num_stream;
sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0);
+ } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
+ sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
+ SCTP_STREAM_CHANGE_DENIED);
} else {
sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
- SCTP_STREAM_CHANGED_DENIED);
+ SCTP_STREAM_CHANGE_FAILED);
}
} else if (type == SCTP_STR_RESET_ADD_IN_STREAMS) {
if (asoc->stream_reset_outstanding)
asoc->stream_reset_outstanding--;
- if (action != SCTP_STREAM_RESET_PERFORMED) {
+ if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
- SCTP_STREAM_CHANGED_DENIED);
+ SCTP_STREAM_CHANGE_DENIED);
+ } else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) {
+ sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
+ SCTP_STREAM_CHANGE_FAILED);
}
} else if (type == SCTP_STR_RESET_TSN_REQUEST) {
/**
@@ -3655,7 +3672,7 @@
/* huh ? */
return (0);
}
- if (action == SCTP_STREAM_RESET_PERFORMED) {
+ if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
resp = (struct sctp_stream_reset_response_tsn *)respin;
asoc->stream_reset_outstanding--;
fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
@@ -3682,9 +3699,12 @@
sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), 0);
+ } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
+ sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1),
+ SCTP_ASSOC_RESET_DENIED);
} else {
sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1),
- SCTP_STREAM_RESET_FAILED);
+ SCTP_ASSOC_RESET_FAILED);
}
}
/* get rid of the request and get the request flags */
@@ -3714,10 +3734,12 @@
seq = ntohl(req->request_seq);
if (asoc->str_reset_seq_in == seq) {
- if (trunc) {
+ asoc->last_reset_action[1] = asoc->last_reset_action[0];
+ if (!(asoc->local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) {
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+ } else if (trunc) {
/* Can't do it, since they exceeded our buffer size */
- asoc->last_reset_action[1] = asoc->last_reset_action[0];
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_REJECT;
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
} else if (stcb->asoc.stream_reset_out_is_outstanding == 0) {
len = ntohs(req->ph.param_length);
number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t));
@@ -3725,9 +3747,7 @@
temp = ntohs(req->list_of_streams[i]);
req->list_of_streams[i] = temp;
}
- /* move the reset action back one */
- asoc->last_reset_action[1] = asoc->last_reset_action[0];
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
sctp_add_stream_reset_out(chk, number_entries, req->list_of_streams,
asoc->str_reset_seq_out,
seq, (asoc->sending_seq - 1));
@@ -3737,8 +3757,7 @@
stcb->asoc.stream_reset_outstanding++;
} else {
/* Can't do it, since we have sent one out */
- asoc->last_reset_action[1] = asoc->last_reset_action[0];
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_TRY_LATER;
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS;
}
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
asoc->str_reset_seq_in++;
@@ -3747,7 +3766,7 @@
} else if (asoc->str_reset_seq_in - 2 == seq) {
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
} else {
- sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
}
}
@@ -3769,53 +3788,49 @@
seq = ntohl(req->request_seq);
if (asoc->str_reset_seq_in == seq) {
- fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
- fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
- fwdtsn.ch.chunk_flags = 0;
- fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1);
- sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
- if (abort_flag) {
- return (1);
+ asoc->last_reset_action[1] = stcb->asoc.last_reset_action[0];
+ if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) {
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+ } else {
+ fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
+ fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+ fwdtsn.ch.chunk_flags = 0;
+ fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1);
+ sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
+ if (abort_flag) {
+ return (1);
+ }
+ asoc->highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA;
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(0, 10, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+ }
+ asoc->tsn_last_delivered = asoc->cumulative_tsn = asoc->highest_tsn_inside_map;
+ asoc->mapping_array_base_tsn = asoc->highest_tsn_inside_map + 1;
+ memset(asoc->mapping_array, 0, asoc->mapping_array_size);
+ asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map;
+ memset(asoc->nr_mapping_array, 0, asoc->mapping_array_size);
+ atomic_add_int(&asoc->sending_seq, 1);
+ /* save off historical data for retrans */
+ asoc->last_sending_seq[1] = asoc->last_sending_seq[0];
+ asoc->last_sending_seq[0] = asoc->sending_seq;
+ asoc->last_base_tsnsent[1] = asoc->last_base_tsnsent[0];
+ asoc->last_base_tsnsent[0] = asoc->mapping_array_base_tsn;
+ sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
+ sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
+ sctp_notify_stream_reset_tsn(stcb, asoc->sending_seq, (asoc->mapping_array_base_tsn + 1), 0);
}
- stcb->asoc.highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
- sctp_log_map(0, 10, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
- }
- stcb->asoc.tsn_last_delivered = stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
- stcb->asoc.mapping_array_base_tsn = stcb->asoc.highest_tsn_inside_map + 1;
- memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
- stcb->asoc.highest_tsn_inside_nr_map = stcb->asoc.highest_tsn_inside_map;
- memset(stcb->asoc.nr_mapping_array, 0, stcb->asoc.mapping_array_size);
- atomic_add_int(&stcb->asoc.sending_seq, 1);
- /* save off historical data for retrans */
- stcb->asoc.last_sending_seq[1] = stcb->asoc.last_sending_seq[0];
- stcb->asoc.last_sending_seq[0] = stcb->asoc.sending_seq;
- stcb->asoc.last_base_tsnsent[1] = stcb->asoc.last_base_tsnsent[0];
- stcb->asoc.last_base_tsnsent[0] = stcb->asoc.mapping_array_base_tsn;
-
- sctp_add_stream_reset_result_tsn(chk,
- ntohl(req->request_seq),
- SCTP_STREAM_RESET_PERFORMED,
- stcb->asoc.sending_seq,
- stcb->asoc.mapping_array_base_tsn);
- sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
- sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
- stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
- stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
- sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), 0);
+ sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0],
+ asoc->last_sending_seq[0], asoc->last_base_tsnsent[0]);
asoc->str_reset_seq_in++;
} else if (asoc->str_reset_seq_in - 1 == seq) {
sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0],
- stcb->asoc.last_sending_seq[0],
- stcb->asoc.last_base_tsnsent[0]
- );
+ asoc->last_sending_seq[0], asoc->last_base_tsnsent[0]);
} else if (asoc->str_reset_seq_in - 2 == seq) {
sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[1],
- stcb->asoc.last_sending_seq[1],
- stcb->asoc.last_base_tsnsent[1]
- );
+ asoc->last_sending_seq[1], asoc->last_base_tsnsent[1]);
} else {
- sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
}
return (0);
}
@@ -3846,12 +3861,14 @@
/* move the reset action back one */
asoc->last_reset_action[1] = asoc->last_reset_action[0];
- if (trunc) {
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_REJECT;
+ if (!(asoc->local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) {
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+ } else if (trunc) {
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
} else if (SCTP_TSN_GE(asoc->cumulative_tsn, tsn)) {
/* we can do it now */
sctp_reset_in_stream(stcb, number_entries, req->list_of_streams);
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
} else {
/*
* we must queue it up and thus wait for the TSN's
@@ -3865,8 +3882,8 @@
siz, SCTP_M_STRESET);
if (liste == NULL) {
/* gak out of memory */
- sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_REJECT);
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_REJECT;
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
return;
}
liste->tsn = tsn;
@@ -3874,7 +3891,7 @@
memcpy(&liste->req, req,
(sizeof(struct sctp_stream_reset_out_request) + (number_entries * sizeof(uint16_t))));
TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp);
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
}
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
asoc->str_reset_seq_in++;
@@ -3891,7 +3908,7 @@
*/
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
} else {
- sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
}
}
@@ -3914,12 +3931,14 @@
/* Now what would be the new total? */
if (asoc->str_reset_seq_in == seq) {
num_stream += stcb->asoc.streamincnt;
- if ((num_stream > stcb->asoc.max_inbound_streams) ||
+ stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
+ if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) {
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+ } else if ((num_stream > stcb->asoc.max_inbound_streams) ||
(num_stream > 0xffff)) {
/* We must reject it they ask for to many */
denied:
- stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
- stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_REJECT;
+ stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
} else {
/* Ok, we can do that :-) */
struct sctp_stream_in *oldstrm;
@@ -3955,8 +3974,7 @@
SCTP_FREE(oldstrm, SCTP_M_STRMI);
/* update the size */
stcb->asoc.streamincnt = num_stream;
- stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
- stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+ stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0);
}
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
@@ -3974,7 +3992,7 @@
*/
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
} else {
- sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
}
}
@@ -3996,10 +4014,12 @@
num_stream = ntohs(str_add->number_of_streams);
/* Now what would be the new total? */
if (asoc->str_reset_seq_in == seq) {
- if (stcb->asoc.stream_reset_outstanding) {
+ stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
+ if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) {
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+ } else if (stcb->asoc.stream_reset_outstanding) {
/* We must reject it we have something pending */
- stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
- stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_REJECT;
+ stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS;
} else {
/* Ok, we can do that :-) */
int mychk;
@@ -4007,14 +4027,12 @@
mychk = stcb->asoc.streamoutcnt;
mychk += num_stream;
if (mychk < 0x10000) {
- stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
- stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+ stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, 1, num_stream, 0, 1)) {
- stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_REJECT;
+ stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
}
} else {
- stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
- stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_REJECT;
+ stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
}
}
sctp_add_stream_reset_result(chk, seq, stcb->asoc.last_reset_action[0]);
@@ -4032,7 +4050,7 @@
*/
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
} else {
- sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
}
}
@@ -4046,7 +4064,6 @@
int chk_length, param_len, ptype;
struct sctp_paramhdr pstore;
uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE];
-
uint32_t seq = 0;
int num_req = 0;
int trunc = 0;
@@ -4124,7 +4141,7 @@
seq = ntohl(req_out->response_seq);
if (seq == stcb->asoc.str_reset_seq_out) {
/* implicit ack */
- (void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_PERFORMED, NULL);
+ (void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_RESULT_PERFORMED, NULL);
}
}
sctp_handle_str_reset_request_out(stcb, chk, req_out, trunc);
@@ -4355,8 +4372,10 @@
#endif
static struct sctp_tcb *
sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp,
struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen,
+ uint8_t use_mflowid, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_association *asoc;
@@ -4379,7 +4398,7 @@
int auth_skipped = 0;
int asconf_cnt = 0;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -4470,7 +4489,9 @@
if (asconf_len < sizeof(struct sctp_asconf_paramhdr))
break;
stcb = sctp_findassociation_ep_asconf(m,
- *offset, sh, &inp, netp, vrf_id);
+ *offset,
+ dst,
+ sh, &inp, netp, vrf_id);
if (stcb != NULL)
break;
asconf_offset += SCTP_SIZE32(asconf_len);
@@ -4512,7 +4533,8 @@
}
if (stcb == NULL) {
/* no association, so it's out of the blue... */
- sctp_handle_ootb(m, iphlen, *offset, sh, inp, NULL,
+ sctp_handle_ootb(m, iphlen, *offset, src, dst, sh, inp,
+ use_mflowid, mflowid,
vrf_id, port);
*offset = length;
if (locked_tcb) {
@@ -4549,8 +4571,10 @@
if (locked_tcb) {
SCTP_TCB_UNLOCK(locked_tcb);
}
- sctp_handle_ootb(m, iphlen, *offset, sh, inp,
- NULL, vrf_id, port);
+ sctp_handle_ootb(m, iphlen, *offset, src, dst,
+ sh, inp,
+ use_mflowid, mflowid,
+ vrf_id, port);
return (NULL);
}
} else {
@@ -4690,8 +4714,10 @@
/* The INIT chunk must be the only chunk. */
if ((num_chunks > 1) ||
(length - *offset > (int)SCTP_SIZE32(chk_length))) {
- sctp_abort_association(inp, stcb, m,
- iphlen, sh, NULL, vrf_id, port);
+ sctp_abort_association(inp, stcb, m, iphlen,
+ src, dst, sh, NULL,
+ use_mflowid, mflowid,
+ vrf_id, port);
*offset = length;
return (NULL);
}
@@ -4700,14 +4726,18 @@
struct mbuf *op_err;
op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
- sctp_abort_association(inp, stcb, m,
- iphlen, sh, op_err, vrf_id, port);
+ sctp_abort_association(inp, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
*offset = length;
return (NULL);
}
- sctp_handle_init(m, iphlen, *offset, sh,
+ sctp_handle_init(m, iphlen, *offset, src, dst, sh,
(struct sctp_init_chunk *)ch, inp,
- stcb, &abort_no_unlock, vrf_id, port);
+ stcb, &abort_no_unlock,
+ use_mflowid, mflowid,
+ vrf_id, port);
*offset = length;
if ((!abort_no_unlock) && (locked_tcb)) {
SCTP_TCB_UNLOCK(locked_tcb);
@@ -4729,7 +4759,7 @@
}
*offset = length;
if (stcb) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4738,7 +4768,7 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -4755,8 +4785,13 @@
return (NULL);
}
if ((netp) && (*netp)) {
- ret = sctp_handle_init_ack(m, iphlen, *offset, sh,
- (struct sctp_init_ack_chunk *)ch, stcb, *netp, &abort_no_unlock, vrf_id);
+ ret = sctp_handle_init_ack(m, iphlen, *offset,
+ src, dst, sh,
+ (struct sctp_init_ack_chunk *)ch,
+ stcb, *netp,
+ &abort_no_unlock,
+ use_mflowid, mflowid,
+ vrf_id);
} else {
ret = -1;
}
@@ -5061,8 +5096,10 @@
struct mbuf *op_err;
op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
- sctp_abort_association(inp, stcb, m,
- iphlen, sh, op_err, vrf_id, port);
+ sctp_abort_association(inp, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
}
*offset = length;
return (NULL);
@@ -5087,13 +5124,17 @@
if (netp) {
ret_buf =
sctp_handle_cookie_echo(m, iphlen,
- *offset, sh,
+ *offset,
+ src, dst,
+ sh,
(struct sctp_cookie_echo_chunk *)ch,
&inp, &stcb, netp,
auth_skipped,
auth_offset,
auth_len,
&locked_tcb,
+ use_mflowid,
+ mflowid,
vrf_id,
port);
} else {
@@ -5141,7 +5182,7 @@
if ((stcb) && (stcb->asoc.total_output_queue_size)) {
;
} else if (stcb) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -5150,7 +5191,7 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
*offset = length;
@@ -5248,7 +5289,7 @@
__LINE__);
}
stcb->asoc.overall_error_count = 0;
- sctp_handle_asconf(m, *offset,
+ sctp_handle_asconf(m, *offset, src,
(struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0);
asconf_cnt++;
}
@@ -5304,7 +5345,7 @@
*fwd_tsn_seen = 1;
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
/* We are not interested anymore */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -5313,7 +5354,7 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
*offset = length;
@@ -5349,7 +5390,7 @@
}
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
/* We are not interested anymore */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -5358,7 +5399,7 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
*offset = length;
@@ -5544,26 +5585,144 @@
* common input chunk processing (v4 and v6)
*/
void
-sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset,
- int length, struct sctphdr *sh, struct sctp_chunkhdr *ch,
- struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net,
- uint8_t ecn_bits, uint32_t vrf_id, uint16_t port)
+sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int length,
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, struct sctp_chunkhdr *ch,
+#if !defined(SCTP_WITH_NO_CSUM)
+ uint8_t compute_crc,
+#endif
+ uint8_t ecn_bits,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port)
{
- /*
- * Control chunk processing
- */
uint32_t high_tsn;
int fwd_tsn_seen = 0, data_processed = 0;
struct mbuf *m = *mm;
int un_sent;
int cnt_ctrl_ready = 0;
+ struct sctp_inpcb *inp, *inp_decr = NULL;
+ struct sctp_tcb *stcb = NULL;
+ struct sctp_nets *net = NULL;
SCTP_STAT_INCR(sctps_recvdatagrams);
#ifdef SCTP_AUDITING_ENABLED
sctp_audit_log(0xE0, 1);
sctp_auditing(0, inp, stcb, net);
#endif
-
+#if !defined(SCTP_WITH_NO_CSUM)
+ if (compute_crc != 0) {
+ uint32_t check, calc_check;
+
+ check = sh->checksum;
+ sh->checksum = 0;
+ calc_check = sctp_calculate_cksum(m, iphlen);
+ sh->checksum = check;
+ if (calc_check != check) {
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n",
+ calc_check, check, m, length, iphlen);
+ stcb = sctp_findassociation_addr(m, offset, src, dst,
+ sh, ch, &inp, &net, vrf_id);
+ if ((net != NULL) && (port != 0)) {
+ if (net->port == 0) {
+ sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ }
+ net->port = port;
+ }
+ if ((net != NULL) && (use_mflowid != 0)) {
+ net->flowid = mflowid;
+#ifdef INVARIANTS
+ net->flowidset = 1;
+#endif
+ }
+ if ((inp != NULL) && (stcb != NULL)) {
+ sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1);
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
+ } else if ((inp != NULL) && (stcb == NULL)) {
+ inp_decr = inp;
+ }
+ SCTP_STAT_INCR(sctps_badsum);
+ SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
+ goto out;
+ }
+ }
+#endif
+ /* Destination port of 0 is illegal, based on RFC4960. */
+ if (sh->dest_port == 0) {
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto out;
+ }
+ stcb = sctp_findassociation_addr(m, offset, src, dst,
+ sh, ch, &inp, &net, vrf_id);
+ if ((net != NULL) && (port != 0)) {
+ if (net->port == 0) {
+ sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ }
+ net->port = port;
+ }
+ if ((net != NULL) && (use_mflowid != 0)) {
+ net->flowid = mflowid;
+#ifdef INVARIANTS
+ net->flowidset = 1;
+#endif
+ }
+ if (inp == NULL) {
+ SCTP_STAT_INCR(sctps_noport);
+ if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0) {
+ goto out;
+ }
+ if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+ sctp_send_shutdown_complete2(src, dst, sh,
+ use_mflowid, mflowid,
+ vrf_id, port);
+ goto out;
+ }
+ if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
+ goto out;
+ }
+ if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
+ if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
+ ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
+ (ch->chunk_type != SCTP_INIT))) {
+ sctp_send_abort(m, iphlen, src, dst,
+ sh, 0, NULL,
+ use_mflowid, mflowid,
+ vrf_id, port);
+ }
+ }
+ goto out;
+ } else if (stcb == NULL) {
+ inp_decr = inp;
+ }
+#ifdef IPSEC
+ /*-
+ * I very much doubt any of the IPSEC stuff will work but I have no
+ * idea, so I will leave it in place.
+ */
+ if (inp != NULL) {
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if (ipsec4_in_reject(m, &inp->ip_inp.inp)) {
+ MODULE_GLOBAL(ipsec4stat).in_polvio++;
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto out;
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (ipsec6_in_reject(m, &inp->ip_inp.inp)) {
+ MODULE_GLOBAL(ipsec6stat).in_polvio++;
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto out;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+#endif
SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n",
m, iphlen, offset, length, stcb);
if (stcb) {
@@ -5582,16 +5741,21 @@
* NOT respond to any packet.. its OOTB.
*/
SCTP_TCB_UNLOCK(stcb);
- sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+ stcb = NULL;
+ sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp,
+ use_mflowid, mflowid,
vrf_id, port);
- goto out_now;
+ goto out;
}
}
if (IS_SCTP_CONTROL(ch)) {
/* process the control portion of the SCTP packet */
/* sa_ignore NO_NULL_CHK */
- stcb = sctp_process_control(m, iphlen, &offset, length, sh, ch,
- inp, stcb, &net, &fwd_tsn_seen, vrf_id, port);
+ stcb = sctp_process_control(m, iphlen, &offset, length,
+ src, dst, sh, ch,
+ inp, stcb, &net, &fwd_tsn_seen,
+ use_mflowid, mflowid,
+ vrf_id, port);
if (stcb) {
/*
* This covers us if the cookie-echo was there and
@@ -5621,20 +5785,19 @@
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) {
/* "silently" ignore */
SCTP_STAT_INCR(sctps_recvauthmissing);
- SCTP_TCB_UNLOCK(stcb);
- goto out_now;
+ goto out;
}
if (stcb == NULL) {
/* out of the blue DATA chunk */
- sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+ sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp,
+ use_mflowid, mflowid,
vrf_id, port);
- goto out_now;
+ goto out;
}
if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
/* v_tag mismatch! */
SCTP_STAT_INCR(sctps_badvtag);
- SCTP_TCB_UNLOCK(stcb);
- goto out_now;
+ goto out;
}
}
@@ -5644,7 +5807,7 @@
* packet while processing control, or we're done with this
* packet (done or skip rest of data), so we drop it...
*/
- goto out_now;
+ goto out;
}
/*
* DATA chunk processing
@@ -5695,10 +5858,10 @@
/*
* We consider OOTB any data sent during asoc setup.
*/
- sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+ sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp,
+ use_mflowid, mflowid,
vrf_id, port);
- SCTP_TCB_UNLOCK(stcb);
- goto out_now;
+ goto out;
/* sa_ignore NOTREACHED */
break;
case SCTP_STATE_EMPTY: /* should not happen */
@@ -5706,8 +5869,7 @@
case SCTP_STATE_SHUTDOWN_RECEIVED: /* This is a peer error */
case SCTP_STATE_SHUTDOWN_ACK_SENT:
default:
- SCTP_TCB_UNLOCK(stcb);
- goto out_now;
+ goto out;
/* sa_ignore NOTREACHED */
break;
case SCTP_STATE_OPEN:
@@ -5715,14 +5877,18 @@
break;
}
/* plow through the data chunks while length > offset */
- retval = sctp_process_data(mm, iphlen, &offset, length, sh,
- inp, stcb, net, &high_tsn);
+ retval = sctp_process_data(mm, iphlen, &offset, length,
+ src, dst, sh,
+ inp, stcb, net, &high_tsn,
+ use_mflowid, mflowid,
+ vrf_id, port);
if (retval == 2) {
/*
* The association aborted, NO UNLOCK needed since
* the association is destroyed.
*/
- goto out_now;
+ stcb = NULL;
+ goto out;
}
data_processed = 1;
/*
@@ -5779,10 +5945,20 @@
sctp_audit_log(0xE0, 3);
sctp_auditing(2, inp, stcb, net);
#endif
- SCTP_TCB_UNLOCK(stcb);
-out_now:
+out:
+ if (stcb != NULL) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ if (inp_decr != NULL) {
+ /* reduce ref-count */
+ SCTP_INP_WLOCK(inp_decr);
+ SCTP_INP_DECR_REF(inp_decr);
+ SCTP_INP_WUNLOCK(inp_decr);
+ }
#ifdef INVARIANTS
- sctp_validate_no_locks(inp);
+ if (inp != NULL) {
+ sctp_validate_no_locks(inp);
+ }
#endif
return;
}
@@ -5792,9 +5968,9 @@
sctp_print_mbuf_chain(struct mbuf *m)
{
for (; m; m = SCTP_BUF_NEXT(m)) {
- printf("%p: m_len = %ld\n", m, SCTP_BUF_LEN(m));
+ SCTP_PRINTF("%p: m_len = %ld\n", m, SCTP_BUF_LEN(m));
if (SCTP_BUF_IS_EXTENDED(m))
- printf("%p: extend_size = %d\n", m, SCTP_BUF_EXTEND_SIZE(m));
+ SCTP_PRINTF("%p: extend_size = %d\n", m, SCTP_BUF_EXTEND_SIZE(m));
}
}
@@ -5808,33 +5984,25 @@
int iphlen;
uint32_t vrf_id = 0;
uint8_t ecn_bits;
+ struct sockaddr_in src, dst;
struct ip *ip;
struct sctphdr *sh;
- struct sctp_inpcb *inp = NULL;
- struct sctp_nets *net;
- struct sctp_tcb *stcb = NULL;
struct sctp_chunkhdr *ch;
- int refcount_up = 0;
- int length, mlen, offset;
+ int length, offset;
#if !defined(SCTP_WITH_NO_CSUM)
- uint32_t check, calc_check;
+ uint8_t compute_crc;
#endif
-
+ uint32_t mflowid;
+ uint8_t use_mflowid;
+
+ iphlen = off;
if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) {
SCTP_RELEASE_PKT(i_pak);
return;
}
- mlen = SCTP_HEADER_LEN(i_pak);
- iphlen = off;
m = SCTP_HEADER_TO_CHAIN(i_pak);
-
- net = NULL;
- SCTP_STAT_INCR(sctps_recvpackets);
- SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
-
-
#ifdef SCTP_MBUF_LOGGING
/* Log in any input mbufs */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
@@ -5847,208 +6015,85 @@
}
}
#endif
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(m, mlen);
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+ sctp_packet_log(m);
+ }
#endif
- /*
- * Must take out the iphlen, since mlen expects this (only effect lb
- * case)
- */
- mlen -= iphlen;
-
- /*
- * Get IP, SCTP, and first chunk header together in first mbuf.
- */
- ip = mtod(m, struct ip *);
- offset = iphlen + sizeof(*sh) + sizeof(*ch);
- if (SCTP_BUF_LEN(m) < offset) {
- if ((m = m_pullup(m, offset)) == 0) {
- SCTP_STAT_INCR(sctps_hdrops);
- return;
- }
- ip = mtod(m, struct ip *);
- }
- /* validate mbuf chain length with IP payload length */
- if (mlen < (SCTP_GET_IPV4_LENGTH(ip) - iphlen)) {
- SCTP_STAT_INCR(sctps_hdrops);
- goto bad;
- }
- sh = (struct sctphdr *)((caddr_t)ip + iphlen);
- ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(*sh));
- SCTPDBG(SCTP_DEBUG_INPUT1,
- "sctp_input() length:%d iphlen:%d\n", mlen, iphlen);
-
- /* SCTP does not allow broadcasts or multicasts */
- if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
- goto bad;
- }
- if (SCTP_IS_IT_BROADCAST(ip->ip_dst, m)) {
- /*
- * We only look at broadcast if its a front state, All
- * others we will not have a tcb for anyway.
- */
- goto bad;
- }
- /* validate SCTP checksum */
SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
"sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
m->m_pkthdr.len,
if_name(m->m_pkthdr.rcvif),
m->m_pkthdr.csum_flags);
+ if (m->m_flags & M_FLOWID) {
+ mflowid = m->m_pkthdr.flowid;
+ use_mflowid = 1;
+ } else {
+ mflowid = 0;
+ use_mflowid = 0;
+ }
+ SCTP_STAT_INCR(sctps_recvpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
+ /* Get IP, SCTP, and first chunk header together in the first mbuf. */
+ offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ if (SCTP_BUF_LEN(m) < offset) {
+ if ((m = m_pullup(m, offset)) == NULL) {
+ SCTP_STAT_INCR(sctps_hdrops);
+ return;
+ }
+ }
+ ip = mtod(m, struct ip *);
+ sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+ ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr));
+ offset -= sizeof(struct sctp_chunkhdr);
+ memset(&src, 0, sizeof(struct sockaddr_in));
+ src.sin_family = AF_INET;
+ src.sin_len = sizeof(struct sockaddr_in);
+ src.sin_port = sh->src_port;
+ src.sin_addr = ip->ip_src;
+ memset(&dst, 0, sizeof(struct sockaddr_in));
+ dst.sin_family = AF_INET;
+ dst.sin_len = sizeof(struct sockaddr_in);
+ dst.sin_port = sh->dest_port;
+ dst.sin_addr = ip->ip_dst;
+ length = ip->ip_len + iphlen;
+ /* Validate mbuf chain length with IP payload length. */
+ if (SCTP_HEADER_LEN(m) != length) {
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "sctp_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m));
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto out;
+ }
+ /* SCTP does not allow broadcasts or multicasts */
+ if (IN_MULTICAST(ntohl(dst.sin_addr.s_addr))) {
+ goto out;
+ }
+ if (SCTP_IS_IT_BROADCAST(dst.sin_addr, m)) {
+ goto out;
+ }
+ ecn_bits = ip->ip_tos;
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_recvnocrc);
#else
if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
SCTP_STAT_INCR(sctps_recvhwcrc);
- goto sctp_skip_csum_4;
- }
- check = sh->checksum; /* save incoming checksum */
- sh->checksum = 0; /* prepare for calc */
- calc_check = sctp_calculate_cksum(m, iphlen);
- sh->checksum = check;
- SCTP_STAT_INCR(sctps_recvswcrc);
- if (calc_check != check) {
- SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n",
- calc_check, check, m, mlen, iphlen);
-
- stcb = sctp_findassociation_addr(m,
- offset - sizeof(*ch),
- sh, ch, &inp, &net,
- vrf_id);
- if ((net) && (port)) {
- if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
- }
- net->port = port;
- }
- if ((net != NULL) && (m->m_flags & M_FLOWID)) {
- net->flowid = m->m_pkthdr.flowid;
-#ifdef INVARIANTS
- net->flowidset = 1;
+ compute_crc = 0;
+ } else {
+ SCTP_STAT_INCR(sctps_recvswcrc);
+ compute_crc = 1;
+ }
#endif
- }
- if ((inp) && (stcb)) {
- sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
- sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
- } else if ((inp != NULL) && (stcb == NULL)) {
- refcount_up = 1;
- }
- SCTP_STAT_INCR(sctps_badsum);
- SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
- goto bad;
- }
-sctp_skip_csum_4:
+ sctp_common_input_processing(&m, iphlen, offset, length,
+ (struct sockaddr *)&src,
+ (struct sockaddr *)&dst,
+ sh, ch,
+#if !defined(SCTP_WITH_NO_CSUM)
+ compute_crc,
#endif
- /* destination port of 0 is illegal, based on RFC2960. */
- if (sh->dest_port == 0) {
- SCTP_STAT_INCR(sctps_hdrops);
- goto bad;
- }
- /*
- * Locate pcb and tcb for datagram sctp_findassociation_addr() wants
- * IP/SCTP/first chunk header...
- */
- stcb = sctp_findassociation_addr(m, offset - sizeof(*ch),
- sh, ch, &inp, &net, vrf_id);
- if ((net) && (port)) {
- if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
- }
- net->port = port;
- }
- if ((net != NULL) && (m->m_flags & M_FLOWID)) {
- net->flowid = m->m_pkthdr.flowid;
-#ifdef INVARIANTS
- net->flowidset = 1;
-#endif
- }
- /* inp's ref-count increased && stcb locked */
- if (inp == NULL) {
- struct sctp_init_chunk *init_chk, chunk_buf;
-
- SCTP_STAT_INCR(sctps_noport);
-#ifdef ICMP_BANDLIM
- /*
- * we use the bandwidth limiting to protect against sending
- * too many ABORTS all at once. In this case these count the
- * same as an ICMP message.
- */
- if (badport_bandlim(0) < 0)
- goto bad;
-#endif /* ICMP_BANDLIM */
- SCTPDBG(SCTP_DEBUG_INPUT1,
- "Sending a ABORT from packet entry!\n");
- if (ch->chunk_type == SCTP_INITIATION) {
- /*
- * we do a trick here to get the INIT tag, dig in
- * and get the tag from the INIT and put it in the
- * common header.
- */
- init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
- iphlen + sizeof(*sh), sizeof(*init_chk),
- (uint8_t *) & chunk_buf);
- if (init_chk != NULL)
- sh->v_tag = init_chk->init.initiate_tag;
- }
- if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
- sctp_send_shutdown_complete2(m, sh, vrf_id, port);
- goto bad;
- }
- if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
- goto bad;
- }
- if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
- if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
- ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
- (ch->chunk_type != SCTP_INIT))) {
- sctp_send_abort(m, iphlen, sh, 0, NULL, vrf_id, port);
- }
- }
- goto bad;
- } else if (stcb == NULL) {
- refcount_up = 1;
- }
-#ifdef IPSEC
- /*
- * I very much doubt any of the IPSEC stuff will work but I have no
- * idea, so I will leave it in place.
- */
- if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) {
- MODULE_GLOBAL(ipsec4stat).in_polvio++;
- SCTP_STAT_INCR(sctps_hdrops);
- goto bad;
- }
-#endif /* IPSEC */
-
- /*
- * common chunk processing
- */
- length = ip->ip_len + iphlen;
- offset -= sizeof(struct sctp_chunkhdr);
-
- ecn_bits = ip->ip_tos;
-
- /* sa_ignore NO_NULL_CHK */
- sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
- inp, stcb, net, ecn_bits, vrf_id, port);
- /* inp's ref-count reduced && stcb unlocked */
- if (m) {
- sctp_m_freem(m);
- }
- if ((inp) && (refcount_up)) {
- /* reduce ref-count */
- SCTP_INP_DECR_REF(inp);
- }
- return;
-bad:
- if (stcb) {
- SCTP_TCB_UNLOCK(stcb);
- }
- if ((inp) && (refcount_up)) {
- /* reduce ref-count */
- SCTP_INP_DECR_REF(inp);
- }
+ ecn_bits,
+ use_mflowid, mflowid,
+ vrf_id, port);
+out:
if (m) {
sctp_m_freem(m);
}
@@ -6078,15 +6123,14 @@
* No flow id built by lower layers fix it so we
* create one.
*/
- ip = mtod(m, struct ip *);
- offset = off + sizeof(*sh);
+ offset = off + sizeof(struct sctphdr);
if (SCTP_BUF_LEN(m) < offset) {
- if ((m = m_pullup(m, offset)) == 0) {
+ if ((m = m_pullup(m, offset)) == NULL) {
SCTP_STAT_INCR(sctps_hdrops);
return;
}
- ip = mtod(m, struct ip *);
}
+ ip = mtod(m, struct ip *);
sh = (struct sctphdr *)((caddr_t)ip + off);
tag = htonl(sh->v_tag);
flowid = tag ^ ntohs(sh->dest_port) ^ ntohs(sh->src_port);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_input.h
--- a/head/sys/netinet/sctp_input.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_input.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,19 +30,23 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_input.h,v 1.6 2005/03/06 16:04:17 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_input.h 238003 2012-07-02 16:44:09Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_input.h 228653 2011-12-17 19:21:40Z tuexen $");
-
-#ifndef __sctp_input_h__
-#define __sctp_input_h__
+#ifndef _NETINET_SCTP_INPUT_H_
+#define _NETINET_SCTP_INPUT_H_
#if defined(_KERNEL) || defined(__Userspace__)
void
sctp_common_input_processing(struct mbuf **, int, int, int,
- struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb *,
- struct sctp_tcb *, struct sctp_nets *, uint8_t, uint32_t, uint16_t);
+ struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, struct sctp_chunkhdr *,
+#if !defined(SCTP_WITH_NO_CSUM)
+ uint8_t,
+#endif
+ uint8_t,
+ uint8_t, uint32_t,
+ uint32_t, uint16_t);
struct sctp_stream_reset_out_request *
sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_lock_bsd.h
--- a/head/sys/netinet/sctp_lock_bsd.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_lock_bsd.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,9 +1,7 @@
-#ifndef __sctp_lock_bsd_h__
-#define __sctp_lock_bsd_h__
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,6 +30,12 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_lock_bsd.h 235828 2012-05-23 11:26:28Z tuexen $");
+
+#ifndef _NETINET_SCTP_LOCK_BSD_H_
+#define _NETINET_SCTP_LOCK_BSD_H_
+
/*
* General locking concepts: The goal of our locking is to of course provide
* consistency and yet minimize overhead. We will attempt to use
@@ -70,9 +74,6 @@
* SCTP_INP_INFO_RLOCK() and then when we want to add a new association to
* the SCTP_BASE_INFO() list's we will do a SCTP_INP_INFO_WLOCK().
*/
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
extern struct sctp_foo_stuff sctp_logoff[];
extern int sctp_logoff_stuff;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_os.h
--- a/head/sys/netinet/sctp_os.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_os.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -29,10 +29,12 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_os.h 228653 2011-12-17 19:21:40Z tuexen $");
-#ifndef __sctp_os_h__
-#define __sctp_os_h__
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_os.h 235828 2012-05-23 11:26:28Z tuexen $");
+
+#ifndef _NETINET_SCTP_OS_H_
+#define _NETINET_SCTP_OS_H_
/*
* General kernel memory allocation:
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_os_bsd.h
--- a/head/sys/netinet/sctp_os_bsd.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_os_bsd.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -29,10 +29,12 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_os_bsd.h 231852 2012-02-17 02:39:58Z bz $");
-#ifndef __sctp_os_bsd_h__
-#define __sctp_os_bsd_h__
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_os_bsd.h 237715 2012-06-28 16:01:08Z tuexen $");
+
+#ifndef _NETINET_SCTP_OS_BSD_H_
+#define _NETINET_SCTP_OS_BSD_H_
/*
* includes
*/
@@ -157,12 +159,13 @@
*/
#define USER_ADDR_NULL (NULL) /* FIX ME: temp */
+#define SCTP_PRINTF(params...) printf(params)
#if defined(SCTP_DEBUG)
#define SCTPDBG(level, params...) \
{ \
do { \
if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
- printf(params); \
+ SCTP_PRINTF(params); \
} \
} while (0); \
}
@@ -174,20 +177,10 @@
} \
} while (0); \
}
-#define SCTPDBG_PKT(level, iph, sh) \
-{ \
- do { \
- if (SCTP_BASE_SYSCTL(sctp_debug_on) & level) { \
- sctp_print_address_pkt(iph, sh); \
- } \
- } while (0); \
-}
#else
#define SCTPDBG(level, params...)
#define SCTPDBG_ADDR(level, addr)
-#define SCTPDBG_PKT(level, iph, sh)
#endif
-#define SCTP_PRINTF(params...) printf(params)
#ifdef SCTP_LTRACE_CHUNKS
#define SCTP_LTRACE_CHK(a, b, c, d) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_CHUNK_ENABLE) SCTP_CTR6(KTR_SUBSYS, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_CHUNK_PROC, 0, a, b, c, d)
@@ -196,12 +189,14 @@
#endif
#ifdef SCTP_LTRACE_ERRORS
-#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
- printf("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
- m, inp, stcb, net, file, __LINE__, err);
-#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
- printf("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
- inp, stcb, net, file, __LINE__, err);
+#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) \
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
+ SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+ m, inp, stcb, net, file, __LINE__, err);
+#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) \
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
+ SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+ inp, stcb, net, file, __LINE__, err);
#else
#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err)
#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err)
@@ -361,7 +356,7 @@
*/
#define SCTP_HEADER_TO_CHAIN(m) (m)
#define SCTP_DETACH_HEADER_FROM_CHAIN(m)
-#define SCTP_HEADER_LEN(m) (m->m_pkthdr.len)
+#define SCTP_HEADER_LEN(m) ((m)->m_pkthdr.len)
#define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0
#define SCTP_RELEASE_HEADER(m)
#define SCTP_RELEASE_PKT(m) sctp_m_freem(m)
@@ -390,10 +385,6 @@
* its a NOP.
*/
-/* Macro's for getting length from V6/V4 header */
-#define SCTP_GET_IPV4_LENGTH(iph) (iph->ip_len)
-#define SCTP_GET_IPV6_LENGTH(ip6) (ntohs(ip6->ip6_plen))
-
/* get the v6 hop limit */
#define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_output.c
--- a/head/sys/netinet/sctp_output.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_output.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_output.c 234461 2012-04-19 13:11:17Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_output.c 238501 2012-07-15 20:16:17Z tuexen $");
#include <netinet/sctp_os.h>
#include <sys/proc.h>
@@ -3062,7 +3060,7 @@
continue;
}
} else {
- printf("Stcb is null - no print\n");
+ SCTP_PRINTF("Stcb is null - no print\n");
}
atomic_add_int(&sifa->refcount, 1);
goto out;
@@ -3430,7 +3428,7 @@
}
m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo);
sndrcvinfo->sinfo_keynumber_valid = 1;
- sndrcvinfo->sinfo_keynumber = authinfo.auth_keyid;
+ sndrcvinfo->sinfo_keynumber = authinfo.auth_keynumber;
break;
default:
return (found);
@@ -3801,6 +3799,7 @@
}
}
+#if defined(INET) || defined(INET6)
static void
sctp_handle_no_route(struct sctp_tcb *stcb,
struct sctp_nets *net,
@@ -3815,8 +3814,7 @@
if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", net);
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
- stcb,
- SCTP_FAILED_THRESHOLD,
+ stcb, 0,
(void *)net,
so_locked);
net->dest_state &= ~SCTP_ADDR_REACHABLE;
@@ -3846,6 +3844,8 @@
}
}
+#endif
+
static int
sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
struct sctp_tcb *stcb, /* may be NULL */
@@ -3862,13 +3862,13 @@
uint16_t dest_port,
uint32_t v_tag,
uint16_t port,
+ union sctp_sockstore *over_addr,
+ uint8_t use_mflowid, uint32_t mflowid,
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
- int so_locked SCTP_UNUSED,
+ int so_locked SCTP_UNUSED
#else
- int so_locked,
-#endif
- union sctp_sockstore *over_addr,
- struct mbuf *init
+ int so_locked
+#endif
)
/* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
{
@@ -3885,17 +3885,21 @@
* interface and smallest_mtu size as well.
*/
/* Will need ifdefs around this */
- struct mbuf *o_pak;
struct mbuf *newm;
struct sctphdr *sctphdr;
int packet_length;
int ret;
uint32_t vrf_id;
+
+#if defined(INET) || defined(INET6)
+ struct mbuf *o_pak;
sctp_route_t *ro = NULL;
struct udphdr *udp = NULL;
+
+#endif
uint8_t tos_value;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so = NULL;
#endif
@@ -3954,8 +3958,8 @@
m->m_pkthdr.flowid = net->flowid;
m->m_flags |= M_FLOWID;
} else {
- if ((init != NULL) && (init->m_flags & M_FLOWID)) {
- m->m_pkthdr.flowid = init->m_pkthdr.flowid;
+ if (use_mflowid != 0) {
+ m->m_pkthdr.flowid = mflowid;
m->m_flags |= M_FLOWID;
}
}
@@ -4110,23 +4114,13 @@
sctp_m_freem(m);
return (ENOMEM);
}
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(m, packet_length);
-#endif
SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
#else
- if (!(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) &&
- (stcb) &&
- (stcb->asoc.loopback_scope))) {
- sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip) + sizeof(struct udphdr));
- SCTP_STAT_INCR(sctps_sendswcrc);
- } else {
- SCTP_STAT_INCR(sctps_sendnocrc);
- }
+ sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip) + sizeof(struct udphdr));
+ SCTP_STAT_INCR(sctps_sendswcrc);
#endif
if (V_udp_cksum) {
SCTP_ENABLE_UDP_CSUM(o_pak);
@@ -4140,15 +4134,19 @@
SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(o_pak);
+#endif
/* send it out. table id is taken from stcb */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
so = SCTP_INP_SO(inp);
SCTP_SOCKET_UNLOCK(so, 0);
}
#endif
SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4165,10 +4163,7 @@
SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
if (net == NULL) {
/* free tempy routes */
- if (ro->ro_rt) {
- RTFREE(ro->ro_rt);
- ro->ro_rt = NULL;
- }
+ RO_RTFREE(ro);
} else {
/*
* PMTU check versus smallest asoc MTU goes
@@ -4252,8 +4247,8 @@
m->m_pkthdr.flowid = net->flowid;
m->m_flags |= M_FLOWID;
} else {
- if ((init != NULL) && (init->m_flags & M_FLOWID)) {
- m->m_pkthdr.flowid = init->m_pkthdr.flowid;
+ if (use_mflowid != 0) {
+ m->m_pkthdr.flowid = mflowid;
m->m_flags |= M_FLOWID;
}
}
@@ -4468,23 +4463,13 @@
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
return (ENOMEM);
}
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(m, packet_length);
-#endif
SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
#else
- if (!(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) &&
- (stcb) &&
- (stcb->asoc.loopback_scope))) {
- sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
- SCTP_STAT_INCR(sctps_sendswcrc);
- } else {
- SCTP_STAT_INCR(sctps_sendnocrc);
- }
+ sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
+ SCTP_STAT_INCR(sctps_sendswcrc);
#endif
if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), packet_length - sizeof(struct ip6_hdr))) == 0) {
udp->uh_sum = 0xffff;
@@ -4493,20 +4478,24 @@
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
#else
- m->m_pkthdr.csum_flags = CSUM_SCTP;
+ m->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
m->m_pkthdr.csum_data = 0;
SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
/* send it out. table id is taken from stcb */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
so = SCTP_INP_SO(inp);
SCTP_SOCKET_UNLOCK(so, 0);
}
#endif
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(o_pak);
+#endif
SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4528,9 +4517,7 @@
}
if (net == NULL) {
/* Now if we had a temp route free it */
- if (ro->ro_rt) {
- RTFREE(ro->ro_rt);
- }
+ RO_RTFREE(ro);
} else {
/*
* PMTU check versus smallest asoc MTU goes
@@ -4856,7 +4843,9 @@
(struct sockaddr *)&net->ro._l_addr,
m, 0, NULL, 0, 0, 0, 0,
inp->sctp_lport, stcb->rport, htonl(0),
- net->port, so_locked, NULL, NULL);
+ net->port, NULL,
+ 0, 0,
+ so_locked);
SCTPDBG(SCTP_DEBUG_OUTPUT4, "lowlevel_output - %d\n", ret);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
@@ -5202,7 +5191,7 @@
static int
sctp_are_there_new_addresses(struct sctp_association *asoc,
- struct mbuf *in_initpkt, int offset)
+ struct mbuf *in_initpkt, int offset, struct sockaddr *src)
{
/*
* Given a INIT packet, look through the packet to verify that there
@@ -5217,7 +5206,6 @@
uint16_t ptype, plen;
uint8_t fnd;
struct sctp_nets *net;
- struct ip *iph;
#ifdef INET
struct sockaddr_in sin4, *sa4;
@@ -5225,7 +5213,6 @@
#endif
#ifdef INET6
struct sockaddr_in6 sin6, *sa6;
- struct ip6_hdr *ip6h;
#endif
@@ -5239,37 +5226,18 @@
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(sin6);
#endif
- sa_touse = NULL;
/* First what about the src address of the pkt ? */
- iph = mtod(in_initpkt, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- /* source addr is IPv4 */
- sin4.sin_addr = iph->ip_src;
- sa_touse = (struct sockaddr *)&sin4;
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- /* source addr is IPv6 */
- ip6h = mtod(in_initpkt, struct ip6_hdr *);
- sin6.sin6_addr = ip6h->ip6_src;
- sa_touse = (struct sockaddr *)&sin6;
- break;
-#endif
- default:
- return (1);
- }
-
fnd = 0;
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
sa = (struct sockaddr *)&net->ro._l_addr;
- if (sa->sa_family == sa_touse->sa_family) {
+ if (sa->sa_family == src->sa_family) {
#ifdef INET
if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *src4;
+
sa4 = (struct sockaddr_in *)sa;
- if (sa4->sin_addr.s_addr == sin4.sin_addr.s_addr) {
+ src4 = (struct sockaddr_in *)src;
+ if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
fnd = 1;
break;
}
@@ -5277,8 +5245,11 @@
#endif
#ifdef INET6
if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src6;
+
sa6 = (struct sockaddr_in6 *)sa;
- if (SCTP6_ARE_ADDR_EQUAL(sa6, &sin6)) {
+ src6 = (struct sockaddr_in6 *)src;
+ if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
fnd = 1;
break;
}
@@ -5385,8 +5356,11 @@
*/
void
sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- struct mbuf *init_pkt, int iphlen, int offset, struct sctphdr *sh,
- struct sctp_init_chunk *init_chk, uint32_t vrf_id, uint16_t port, int hold_inp_lock)
+ struct mbuf *init_pkt, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, struct sctp_init_chunk *init_chk,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port, int hold_inp_lock)
{
struct sctp_association *asoc;
struct mbuf *m, *m_at, *m_tmp, *m_cookie, *op_err, *mp_last;
@@ -5395,20 +5369,18 @@
struct sctp_ecn_supported_param *ecn;
struct sctp_prsctp_supported_param *prsctp;
struct sctp_supported_chunk_types_param *pr_supported;
- union sctp_sockstore store, store1, *over_addr;
+ union sctp_sockstore *over_addr;
#ifdef INET
- struct sockaddr_in *sin, *to_sin;
+ struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
+ struct sockaddr_in *src4 = (struct sockaddr_in *)src;
+ struct sockaddr_in *sin;
#endif
#ifdef INET6
- struct sockaddr_in6 *sin6, *to_sin6;
-
-#endif
- struct ip *iph;
-
-#ifdef INET6
- struct ip6_hdr *ip6;
+ struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
+ struct sockaddr_in6 *src6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *sin6;
#endif
struct sockaddr *to;
@@ -5423,21 +5395,24 @@
int nat_friendly = 0;
struct socket *so;
- if (stcb)
+ if (stcb) {
asoc = &stcb->asoc;
- else
+ } else {
asoc = NULL;
+ }
mp_last = NULL;
if ((asoc != NULL) &&
(SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
- (sctp_are_there_new_addresses(asoc, init_pkt, offset))) {
+ (sctp_are_there_new_addresses(asoc, init_pkt, offset, src))) {
/* new addresses, out of here in non-cookie-wait states */
/*
* Send a ABORT, we don't add the new address error clause
* though we even set the T bit and copy in the 0 tag.. this
* looks no different than if no listener was present.
*/
- sctp_send_abort(init_pkt, iphlen, sh, 0, NULL, vrf_id, port);
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, NULL,
+ use_mflowid, mflowid,
+ vrf_id, port);
return;
}
abort_flag = 0;
@@ -5446,8 +5421,10 @@
&abort_flag, (struct sctp_chunkhdr *)init_chk, &nat_friendly);
if (abort_flag) {
do_a_abort:
- sctp_send_abort(init_pkt, iphlen, sh,
- init_chk->init.initiate_tag, op_err, vrf_id, port);
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh,
+ init_chk->init.initiate_tag, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
return;
}
m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
@@ -5509,61 +5486,20 @@
#else
stc.ipv4_scope = 0;
#endif
- /* now for scope setup */
- memset((caddr_t)&store, 0, sizeof(store));
- memset((caddr_t)&store1, 0, sizeof(store1));
+ if (net == NULL) {
+ to = src;
+ switch (dst->sa_family) {
#ifdef INET
- sin = &store.sin;
- to_sin = &store1.sin;
-#endif
-#ifdef INET6
- sin6 = &store.sin6;
- to_sin6 = &store1.sin6;
-#endif
- iph = mtod(init_pkt, struct ip *);
- /* establish the to_addr's */
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- to_sin->sin_port = sh->dest_port;
- to_sin->sin_family = AF_INET;
- to_sin->sin_len = sizeof(struct sockaddr_in);
- to_sin->sin_addr = iph->ip_dst;
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- ip6 = mtod(init_pkt, struct ip6_hdr *);
- to_sin6->sin6_addr = ip6->ip6_dst;
- to_sin6->sin6_scope_id = 0;
- to_sin6->sin6_port = sh->dest_port;
- to_sin6->sin6_family = AF_INET6;
- to_sin6->sin6_len = sizeof(struct sockaddr_in6);
- break;
-#endif
- default:
- goto do_a_abort;
- break;
- }
-
- if (net == NULL) {
- to = (struct sockaddr *)&store;
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
+ case AF_INET:
{
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(struct sockaddr_in);
- sin->sin_port = sh->src_port;
- sin->sin_addr = iph->ip_src;
/* lookup address */
- stc.address[0] = sin->sin_addr.s_addr;
+ stc.address[0] = src4->sin_addr.s_addr;
stc.address[1] = 0;
stc.address[2] = 0;
stc.address[3] = 0;
stc.addr_type = SCTP_IPV4_ADDRESS;
/* local from address */
- stc.laddress[0] = to_sin->sin_addr.s_addr;
+ stc.laddress[0] = dst4->sin_addr.s_addr;
stc.laddress[1] = 0;
stc.laddress[2] = 0;
stc.laddress[3] = 0;
@@ -5571,14 +5507,14 @@
/* scope_id is only for v6 */
stc.scope_id = 0;
#ifndef SCTP_DONT_DO_PRIVADDR_SCOPE
- if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+ if (IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) {
stc.ipv4_scope = 1;
}
#else
stc.ipv4_scope = 1;
#endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */
/* Must use the address in this case */
- if (sctp_is_address_on_local_host((struct sockaddr *)sin, vrf_id)) {
+ if (sctp_is_address_on_local_host(src, vrf_id)) {
stc.loopback_scope = 1;
stc.ipv4_scope = 1;
stc.site_scope = 1;
@@ -5588,32 +5524,17 @@
}
#endif
#ifdef INET6
- case IPV6_VERSION >> 4:
+ case AF_INET6:
{
- ip6 = mtod(init_pkt, struct ip6_hdr *);
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(struct sockaddr_in6);
- sin6->sin6_port = sh->src_port;
- sin6->sin6_addr = ip6->ip6_src;
- /* lookup address */
- memcpy(&stc.address, &sin6->sin6_addr,
- sizeof(struct in6_addr));
- sin6->sin6_scope_id = 0;
stc.addr_type = SCTP_IPV6_ADDRESS;
- stc.scope_id = 0;
- if (sctp_is_address_on_local_host((struct sockaddr *)sin6, vrf_id)) {
- /*
- * FIX ME: does this have scope from
- * rcvif?
- */
- (void)sa6_recoverscope(sin6);
- stc.scope_id = sin6->sin6_scope_id;
- sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
+ memcpy(&stc.address, &src6->sin6_addr, sizeof(struct in6_addr));
+ stc.scope_id = in6_getscope(&src6->sin6_addr);
+ if (sctp_is_address_on_local_host(src, vrf_id)) {
stc.loopback_scope = 1;
stc.local_scope = 0;
stc.site_scope = 1;
stc.ipv4_scope = 1;
- } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ } else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr)) {
/*
* If the new destination is a
* LINK_LOCAL we must have common
@@ -5638,14 +5559,7 @@
* pull out the scope_id from
* incoming pkt
*/
- /*
- * FIX ME: does this have scope from
- * rcvif?
- */
- (void)sa6_recoverscope(sin6);
- stc.scope_id = sin6->sin6_scope_id;
- sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
- } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+ } else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr)) {
/*
* If the new destination is
* SITE_LOCAL then we must have site
@@ -5653,7 +5567,7 @@
*/
stc.site_scope = 1;
}
- memcpy(&stc.laddress, &to_sin6->sin6_addr, sizeof(struct in6_addr));
+ memcpy(&stc.laddress, &dst6->sin6_addr, sizeof(struct in6_addr));
stc.laddr_type = SCTP_IPV6_ADDRESS;
break;
}
@@ -5733,7 +5647,7 @@
if (net->src_addr_selected == 0) {
/*
* strange case here, the INIT should have
- * did the selection.
+ * done the selection.
*/
net->ro._s_addr = sctp_source_address_selection(inp,
stcb, (sctp_route_t *) & net->ro,
@@ -6041,7 +5955,7 @@
}
}
if (stc.loopback_scope) {
- over_addr = &store1;
+ over_addr = (union sctp_sockstore *)dst;
} else {
over_addr = NULL;
}
@@ -6049,7 +5963,9 @@
(void)sctp_lowlevel_chunk_output(inp, NULL, NULL, to, m, 0, NULL, 0, 0,
0, 0,
inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag,
- port, SCTP_SO_NOT_LOCKED, over_addr, init_pkt);
+ port, over_addr,
+ use_mflowid, mflowid,
+ SCTP_SO_NOT_LOCKED);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
}
@@ -6092,14 +6008,14 @@
* if the mbuf is here
*/
int ret_spc;
- int cause;
+ uint8_t sent;
if (chk->sent > SCTP_DATAGRAM_UNSENT)
- cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT;
+ sent = 1;
else
- cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT;
+ sent = 0;
ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
- cause,
+ sent,
SCTP_SO_LOCKED);
freed_spc += ret_spc;
if (freed_spc >= dataout) {
@@ -6122,8 +6038,7 @@
int ret_spc;
ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
- SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT,
- SCTP_SO_LOCKED);
+ 0, SCTP_SO_LOCKED);
freed_spc += ret_spc;
if (freed_spc >= dataout) {
@@ -6573,9 +6488,7 @@
* dis-appearing on us.
*/
atomic_add_int(&stcb->asoc.refcnt, 1);
- sctp_abort_an_association(inp, stcb,
- SCTP_RESPONSE_TO_USER_REQ,
- m, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(inp, stcb, m, SCTP_SO_NOT_LOCKED);
/*
* sctp_abort_an_association calls sctp_free_asoc()
* free association will NOT free it since we
@@ -6669,7 +6582,6 @@
abort_anyway:
atomic_add_int(&stcb->asoc.refcnt, 1);
sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_RESPONSE_TO_USER_REQ,
NULL, SCTP_SO_NOT_LOCKED);
atomic_add_int(&stcb->asoc.refcnt, -1);
goto no_chunk_output;
@@ -8135,7 +8047,9 @@
no_fragmentflg, 0, asconf,
inp->sctp_lport, stcb->rport,
htonl(stcb->asoc.peer_vtag),
- net->port, so_locked, NULL, NULL))) {
+ net->port, NULL,
+ 0, 0,
+ so_locked))) {
if (error == ENOBUFS) {
asoc->ifp_had_enobuf = 1;
SCTP_STAT_INCR(sctps_lowlevelerr);
@@ -8407,7 +8321,9 @@
no_fragmentflg, 0, asconf,
inp->sctp_lport, stcb->rport,
htonl(stcb->asoc.peer_vtag),
- net->port, so_locked, NULL, NULL))) {
+ net->port, NULL,
+ 0, 0,
+ so_locked))) {
if (error == ENOBUFS) {
asoc->ifp_had_enobuf = 1;
SCTP_STAT_INCR(sctps_lowlevelerr);
@@ -8523,12 +8439,14 @@
}
/* now lets add any data within the MTU constraints */
switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+#ifdef INET
case AF_INET:
if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr)))
omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
else
omtu = 0;
break;
+#endif
#ifdef INET6
case AF_INET6:
if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)))
@@ -8746,7 +8664,9 @@
asconf,
inp->sctp_lport, stcb->rport,
htonl(stcb->asoc.peer_vtag),
- net->port, so_locked, NULL, NULL))) {
+ net->port, NULL,
+ 0, 0,
+ so_locked))) {
/* error, we could not output */
if (error == ENOBUFS) {
SCTP_STAT_INCR(sctps_lowlevelerr);
@@ -9448,7 +9368,9 @@
auth_offset, auth, stcb->asoc.authinfo.active_keyid,
no_fragmentflg, 0, 0,
inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
- chk->whoTo->port, so_locked, NULL, NULL))) {
+ chk->whoTo->port, NULL,
+ 0, 0,
+ so_locked))) {
SCTP_STAT_INCR(sctps_lowlevelerr);
return (error);
}
@@ -9493,7 +9415,7 @@
continue;
}
if (chk->data == NULL) {
- printf("TSN:%x chk->snd_count:%d chk->sent:%d can't retran - no data\n",
+ SCTP_PRINTF("TSN:%x chk->snd_count:%d chk->sent:%d can't retran - no data\n",
chk->rec.data.TSN_seq, chk->snd_count, chk->sent);
continue;
}
@@ -9504,7 +9426,7 @@
chk->snd_count,
SCTP_BASE_SYSCTL(sctp_max_retran_chunk));
atomic_add_int(&stcb->asoc.refcnt, 1);
- sctp_abort_an_association(stcb->sctp_ep, stcb, 0, NULL, so_locked);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, so_locked);
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
return (SCTP_RETRAN_EXIT);
@@ -9713,7 +9635,9 @@
auth_offset, auth, auth_keyid,
no_fragmentflg, 0, 0,
inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
- net->port, so_locked, NULL, NULL))) {
+ net->port, NULL,
+ 0, 0,
+ so_locked))) {
/* error, we could not output */
SCTP_STAT_INCR(sctps_lowlevelerr);
return (error);
@@ -10831,7 +10755,9 @@
(struct sockaddr *)&net->ro._l_addr,
m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, 0,
stcb->sctp_ep->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
- stcb->asoc.primary_destination->port, so_locked, NULL, NULL);
+ stcb->asoc.primary_destination->port, NULL,
+ 0, 0,
+ so_locked);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
}
@@ -10868,183 +10794,213 @@
m_shutdown_comp, 0, NULL, 0, 1, 0, 0,
stcb->sctp_ep->sctp_lport, stcb->rport,
htonl(vtag),
- net->port, SCTP_SO_NOT_LOCKED, NULL, NULL);
+ net->port, NULL,
+ 0, 0,
+ SCTP_SO_NOT_LOCKED);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
return;
}
-void
-sctp_send_shutdown_complete2(struct mbuf *m, struct sctphdr *sh,
+static void
+sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, uint32_t vtag,
+ uint8_t type, struct mbuf *cause,
+ uint8_t use_mflowid, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
- /* formulate and SEND a SHUTDOWN-COMPLETE */
struct mbuf *o_pak;
struct mbuf *mout;
- struct ip *iph;
- struct udphdr *udp = NULL;
- int offset_out, len, mlen;
- struct sctp_shutdown_complete_msg *comp_cp;
+ struct sctphdr *shout;
+ struct sctp_chunkhdr *ch;
+ struct udphdr *udp;
+ int len, cause_len, padding_len, ret;
#ifdef INET
- struct ip *iph_out;
+ struct sockaddr_in *src_sin, *dst_sin;
+ struct ip *ip;
#endif
#ifdef INET6
- struct ip6_hdr *ip6, *ip6_out;
-
-#endif
-
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
+ struct sockaddr_in6 *src_sin6, *dst_sin6;
+ struct ip6_hdr *ip6;
+
+#endif
+
+ /* Compute the length of the cause and add final padding. */
+ cause_len = 0;
+ if (cause != NULL) {
+ struct mbuf *m_at, *m_last = NULL;
+
+ for (m_at = cause; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+ if (SCTP_BUF_NEXT(m_at) == NULL)
+ m_last = m_at;
+ cause_len += SCTP_BUF_LEN(m_at);
+ }
+ padding_len = cause_len % 4;
+ if (padding_len != 0) {
+ padding_len = 4 - padding_len;
+ }
+ if (padding_len != 0) {
+ if (sctp_add_pad_tombuf(m_last, padding_len)) {
+ sctp_m_freem(cause);
+ return;
+ }
+ }
+ } else {
+ padding_len = 0;
+ }
+ /* Get an mbuf for the header. */
+ len = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ switch (dst->sa_family) {
#ifdef INET
- case IPVERSION:
- len = (sizeof(struct ip) + sizeof(struct sctp_shutdown_complete_msg));
+ case AF_INET:
+ len += sizeof(struct ip);
break;
#endif
#ifdef INET6
- case IPV6_VERSION >> 4:
- len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_shutdown_complete_msg));
+ case AF_INET6:
+ len += sizeof(struct ip6_hdr);
break;
#endif
default:
- return;
+ break;
}
if (port) {
len += sizeof(struct udphdr);
}
mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
if (mout == NULL) {
+ if (cause) {
+ sctp_m_freem(cause);
+ }
return;
}
SCTP_BUF_RESV_UF(mout, max_linkhdr);
SCTP_BUF_LEN(mout) = len;
- SCTP_BUF_NEXT(mout) = NULL;
- if (m->m_flags & M_FLOWID) {
- mout->m_pkthdr.flowid = m->m_pkthdr.flowid;
+ SCTP_BUF_NEXT(mout) = cause;
+ if (use_mflowid != 0) {
+ mout->m_pkthdr.flowid = mflowid;
mout->m_flags |= M_FLOWID;
}
#ifdef INET
- iph_out = NULL;
+ ip = NULL;
#endif
#ifdef INET6
- ip6_out = NULL;
-#endif
- offset_out = 0;
-
- switch (iph->ip_v) {
+ ip6 = NULL;
+#endif
+ switch (dst->sa_family) {
#ifdef INET
- case IPVERSION:
- iph_out = mtod(mout, struct ip *);
-
- /* Fill in the IP header for the ABORT */
- iph_out->ip_v = IPVERSION;
- iph_out->ip_hl = (sizeof(struct ip) / 4);
- iph_out->ip_tos = (u_char)0;
- iph_out->ip_id = 0;
- iph_out->ip_off = 0;
- iph_out->ip_ttl = MAXTTL;
+ case AF_INET:
+ src_sin = (struct sockaddr_in *)src;
+ dst_sin = (struct sockaddr_in *)dst;
+ ip = mtod(mout, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = (sizeof(struct ip) >> 2);
+ ip->ip_tos = 0;
+ ip->ip_id = ip_newid();
+ ip->ip_off = 0;
+ ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
if (port) {
- iph_out->ip_p = IPPROTO_UDP;
+ ip->ip_p = IPPROTO_UDP;
} else {
- iph_out->ip_p = IPPROTO_SCTP;
- }
- iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
- iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
-
- /* let IP layer calculate this */
- iph_out->ip_sum = 0;
- offset_out += sizeof(*iph_out);
- comp_cp = (struct sctp_shutdown_complete_msg *)(
- (caddr_t)iph_out + offset_out);
+ ip->ip_p = IPPROTO_SCTP;
+ }
+ ip->ip_src.s_addr = dst_sin->sin_addr.s_addr;
+ ip->ip_dst.s_addr = src_sin->sin_addr.s_addr;
+ ip->ip_sum = 0;
+ len = sizeof(struct ip);
+ shout = (struct sctphdr *)((caddr_t)ip + len);
break;
#endif
#ifdef INET6
- case IPV6_VERSION >> 4:
- ip6 = (struct ip6_hdr *)iph;
- ip6_out = mtod(mout, struct ip6_hdr *);
-
- /* Fill in the IPv6 header for the ABORT */
- ip6_out->ip6_flow = ip6->ip6_flow;
- ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
+ case AF_INET6:
+ src_sin6 = (struct sockaddr_in6 *)src;
+ dst_sin6 = (struct sockaddr_in6 *)dst;
+ ip6 = mtod(mout, struct ip6_hdr *);
+ ip6->ip6_flow = htonl(0x60000000);
+ if (V_ip6_auto_flowlabel) {
+ ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+ }
+ ip6->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
if (port) {
- ip6_out->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_nxt = IPPROTO_UDP;
} else {
- ip6_out->ip6_nxt = IPPROTO_SCTP;
- }
- ip6_out->ip6_src = ip6->ip6_dst;
- ip6_out->ip6_dst = ip6->ip6_src;
- /*
- * ?? The old code had both the iph len + payload, I think
- * this is wrong and would never have worked
- */
- ip6_out->ip6_plen = sizeof(struct sctp_shutdown_complete_msg);
- offset_out += sizeof(*ip6_out);
- comp_cp = (struct sctp_shutdown_complete_msg *)(
- (caddr_t)ip6_out + offset_out);
+ ip6->ip6_nxt = IPPROTO_SCTP;
+ }
+ ip6->ip6_src = dst_sin6->sin6_addr;
+ ip6->ip6_dst = src_sin6->sin6_addr;
+ len = sizeof(struct ip6_hdr);
+ shout = (struct sctphdr *)((caddr_t)ip6 + len);
break;
-#endif /* INET6 */
+#endif
default:
- /* Currently not supported. */
- sctp_m_freem(mout);
- return;
+ len = 0;
+ shout = mtod(mout, struct sctphdr *);
+ break;
}
if (port) {
if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
sctp_m_freem(mout);
return;
}
- udp = (struct udphdr *)comp_cp;
+ udp = (struct udphdr *)shout;
udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
udp->uh_dport = port;
- udp->uh_ulen = htons(sizeof(struct sctp_shutdown_complete_msg) + sizeof(struct udphdr));
+ udp->uh_sum = 0;
+ udp->uh_ulen = htons(sizeof(struct udphdr) +
+ sizeof(struct sctphdr) +
+ sizeof(struct sctp_chunkhdr) +
+ cause_len + padding_len);
+ len += sizeof(struct udphdr);
+ shout = (struct sctphdr *)((caddr_t)shout + sizeof(struct udphdr));
+ } else {
+ udp = NULL;
+ }
+ shout->src_port = sh->dest_port;
+ shout->dest_port = sh->src_port;
+ shout->checksum = 0;
+ if (vtag) {
+ shout->v_tag = htonl(vtag);
+ } else {
+ shout->v_tag = sh->v_tag;
+ }
+ len += sizeof(struct sctphdr);
+ ch = (struct sctp_chunkhdr *)((caddr_t)shout + sizeof(struct sctphdr));
+ ch->chunk_type = type;
+ if (vtag) {
+ ch->chunk_flags = 0;
+ } else {
+ ch->chunk_flags = SCTP_HAD_NO_TCB;
+ }
+ ch->chunk_length = htons(sizeof(struct sctp_chunkhdr) + cause_len);
+ len += sizeof(struct sctp_chunkhdr);
+ len += cause_len + padding_len;
+
+ if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+ sctp_m_freem(mout);
+ return;
+ }
+ SCTP_ATTACH_CHAIN(o_pak, mout, len);
+ switch (dst->sa_family) {
#ifdef INET
- if (iph_out) {
+ case AF_INET:
+ if (port) {
if (V_udp_cksum) {
- udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
+ udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
} else {
udp->uh_sum = 0;
}
}
-#endif
- offset_out += sizeof(struct udphdr);
- comp_cp = (struct sctp_shutdown_complete_msg *)((caddr_t)comp_cp + sizeof(struct udphdr));
- }
- if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
- /* no mbuf's */
- sctp_m_freem(mout);
- return;
- }
- /* Now copy in and fill in the ABORT tags etc. */
- comp_cp->sh.src_port = sh->dest_port;
- comp_cp->sh.dest_port = sh->src_port;
- comp_cp->sh.checksum = 0;
- comp_cp->sh.v_tag = sh->v_tag;
- comp_cp->shut_cmp.ch.chunk_flags = SCTP_HAD_NO_TCB;
- comp_cp->shut_cmp.ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
- comp_cp->shut_cmp.ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
-
-#ifdef INET
- if (iph_out != NULL) {
- sctp_route_t ro;
- int ret;
-
- mlen = SCTP_BUF_LEN(mout);
- bzero(&ro, sizeof ro);
- /* set IPv4 length */
- iph_out->ip_len = mlen;
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(mout, mlen);
-#endif
+ ip->ip_len = len;
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
#else
- comp_cp->sh.checksum = sctp_calculate_cksum(mout, offset_out);
+ shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip) + sizeof(struct udphdr));
SCTP_STAT_INCR(sctps_sendswcrc);
#endif
if (V_udp_cksum) {
- SCTP_ENABLE_UDP_CSUM(mout);
+ SCTP_ENABLE_UDP_CSUM(o_pak);
}
} else {
#if defined(SCTP_WITH_NO_CSUM)
@@ -11055,59 +11011,66 @@
SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
- SCTP_ATTACH_CHAIN(o_pak, mout, mlen);
- /* out it goes */
- SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id);
-
- /* Free the route if we got one back */
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
- }
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+ sctp_packet_log(o_pak);
+ }
+#endif
+ SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id);
+ break;
#endif
#ifdef INET6
- if (ip6_out != NULL) {
- struct route_in6 ro;
- int ret;
- struct ifnet *ifp = NULL;
-
- bzero(&ro, sizeof(ro));
- mlen = SCTP_BUF_LEN(mout);
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(mout, mlen);
-#endif
- SCTP_ATTACH_CHAIN(o_pak, mout, mlen);
+ case AF_INET6:
+ ip6->ip6_plen = len - sizeof(struct ip6_hdr);
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
#else
- comp_cp->sh.checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
+ shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
SCTP_STAT_INCR(sctps_sendswcrc);
#endif
- if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), mlen - sizeof(struct ip6_hdr))) == 0) {
+ if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) {
udp->uh_sum = 0xffff;
}
} else {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
#else
- mout->m_pkthdr.csum_flags = CSUM_SCTP;
+ mout->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
mout->m_pkthdr.csum_data = 0;
SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
- SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id);
-
- /* Free the route if we got one back */
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
- }
-#endif
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+ sctp_packet_log(o_pak);
+ }
+#endif
+ SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id);
+ break;
+#endif
+ default:
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
+ dst->sa_family);
+ sctp_m_freem(mout);
+ SCTP_LTRACE_ERR_RET_PKT(mout, NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ return;
+ }
SCTP_STAT_INCR(sctps_sendpackets);
SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
return;
-
+}
+
+void
+sctp_send_shutdown_complete2(struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port)
+{
+ sctp_send_resp_msg(src, dst, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL,
+ use_mflowid, mflowid,
+ vrf_id, port);
}
void
@@ -11278,21 +11241,14 @@
void
sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
- struct mbuf *m, int iphlen, int bad_crc)
+ struct mbuf *m, int len, int iphlen, int bad_crc)
{
struct sctp_association *asoc;
struct sctp_pktdrop_chunk *drp;
struct sctp_tmit_chunk *chk;
uint8_t *datap;
- int len;
int was_trunc = 0;
- struct ip *iph;
-
-#ifdef INET6
- struct ip6_hdr *ip6h;
-
-#endif
- int fullsz = 0, extra = 0;
+ int fullsz = 0;
long spc;
int offset;
struct sctp_chunkhdr *ch, chunk_buf;
@@ -11317,28 +11273,8 @@
return;
}
chk->copy_by_ref = 0;
- iph = mtod(m, struct ip *);
- if (iph == NULL) {
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- return;
- }
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- /* IPv4 */
- len = chk->send_size = iph->ip_len;
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- /* IPv6 */
- ip6h = mtod(m, struct ip6_hdr *);
- len = chk->send_size = htons(ip6h->ip6_plen);
- break;
-#endif
- default:
- return;
- }
+ len -= iphlen;
+ chk->send_size = len;
/* Validate that we do not have an ABORT in here. */
offset = iphlen + sizeof(struct sctphdr);
ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
@@ -11374,7 +11310,7 @@
/*
* only send 1 mtu worth, trim off the excess on the end.
*/
- fullsz = len - extra;
+ fullsz = len;
len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD;
was_trunc = 1;
}
@@ -11938,545 +11874,33 @@
}
void
-sctp_send_abort(struct mbuf *m, int iphlen, struct sctphdr *sh, uint32_t vtag,
- struct mbuf *err_cause, uint32_t vrf_id, uint16_t port)
-{
- /*-
- * Formulate the abort message, and send it back down.
- */
- struct mbuf *o_pak;
- struct mbuf *mout;
- struct sctp_abort_msg *abm;
- struct ip *iph;
- struct udphdr *udp;
- int iphlen_out, len;
-
-#ifdef INET
- struct ip *iph_out;
-
-#endif
-#ifdef INET6
- struct ip6_hdr *ip6, *ip6_out;
-
-#endif
-
- /* don't respond to ABORT with ABORT */
+sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port)
+{
+ /* Don't respond to an ABORT with an ABORT. */
if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) {
- if (err_cause)
- sctp_m_freem(err_cause);
+ if (cause)
+ sctp_m_freem(cause);
return;
}
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- len = (sizeof(struct ip) + sizeof(struct sctp_abort_msg));
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_abort_msg));
- break;
-#endif
- default:
- if (err_cause) {
- sctp_m_freem(err_cause);
- }
- return;
- }
- if (port) {
- len += sizeof(struct udphdr);
- }
- mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
- if (mout == NULL) {
- if (err_cause) {
- sctp_m_freem(err_cause);
- }
- return;
- }
- SCTP_BUF_RESV_UF(mout, max_linkhdr);
- SCTP_BUF_LEN(mout) = len;
- SCTP_BUF_NEXT(mout) = err_cause;
- if (m->m_flags & M_FLOWID) {
- mout->m_pkthdr.flowid = m->m_pkthdr.flowid;
- mout->m_flags |= M_FLOWID;
- }
-#ifdef INET
- iph_out = NULL;
-#endif
-#ifdef INET6
- ip6_out = NULL;
-#endif
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- iph_out = mtod(mout, struct ip *);
-
- /* Fill in the IP header for the ABORT */
- iph_out->ip_v = IPVERSION;
- iph_out->ip_hl = (sizeof(struct ip) / 4);
- iph_out->ip_tos = (u_char)0;
- iph_out->ip_id = 0;
- iph_out->ip_off = 0;
- iph_out->ip_ttl = MAXTTL;
- if (port) {
- iph_out->ip_p = IPPROTO_UDP;
- } else {
- iph_out->ip_p = IPPROTO_SCTP;
- }
- iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
- iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
- /* let IP layer calculate this */
- iph_out->ip_sum = 0;
-
- iphlen_out = sizeof(*iph_out);
- abm = (struct sctp_abort_msg *)((caddr_t)iph_out + iphlen_out);
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- ip6 = (struct ip6_hdr *)iph;
- ip6_out = mtod(mout, struct ip6_hdr *);
-
- /* Fill in the IP6 header for the ABORT */
- ip6_out->ip6_flow = ip6->ip6_flow;
- ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
- if (port) {
- ip6_out->ip6_nxt = IPPROTO_UDP;
- } else {
- ip6_out->ip6_nxt = IPPROTO_SCTP;
- }
- ip6_out->ip6_src = ip6->ip6_dst;
- ip6_out->ip6_dst = ip6->ip6_src;
-
- iphlen_out = sizeof(*ip6_out);
- abm = (struct sctp_abort_msg *)((caddr_t)ip6_out + iphlen_out);
- break;
-#endif /* INET6 */
- default:
- /* Currently not supported */
- sctp_m_freem(mout);
- return;
- }
-
- udp = (struct udphdr *)abm;
- if (port) {
- if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
- sctp_m_freem(mout);
- return;
- }
- udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
- udp->uh_dport = port;
- /* set udp->uh_ulen later */
- udp->uh_sum = 0;
- iphlen_out += sizeof(struct udphdr);
- abm = (struct sctp_abort_msg *)((caddr_t)abm + sizeof(struct udphdr));
- }
- abm->sh.src_port = sh->dest_port;
- abm->sh.dest_port = sh->src_port;
- abm->sh.checksum = 0;
- if (vtag == 0) {
- abm->sh.v_tag = sh->v_tag;
- abm->msg.ch.chunk_flags = SCTP_HAD_NO_TCB;
- } else {
- abm->sh.v_tag = htonl(vtag);
- abm->msg.ch.chunk_flags = 0;
- }
- abm->msg.ch.chunk_type = SCTP_ABORT_ASSOCIATION;
-
- if (err_cause) {
- struct mbuf *m_tmp = err_cause;
- int err_len = 0;
-
- /* get length of the err_cause chain */
- while (m_tmp != NULL) {
- err_len += SCTP_BUF_LEN(m_tmp);
- m_tmp = SCTP_BUF_NEXT(m_tmp);
- }
- len = SCTP_BUF_LEN(mout) + err_len;
- if (err_len % 4) {
- /* need pad at end of chunk */
- uint32_t cpthis = 0;
- int padlen;
-
- padlen = 4 - (len % 4);
- m_copyback(mout, len, padlen, (caddr_t)&cpthis);
- len += padlen;
- }
- abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch) + err_len);
- } else {
- len = SCTP_BUF_LEN(mout);
- abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch));
- }
-
- if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
- /* no mbuf's */
- sctp_m_freem(mout);
- return;
- }
-#ifdef INET
- if (iph_out != NULL) {
- sctp_route_t ro;
- int ret;
-
- /* zap the stack pointer to the route */
- bzero(&ro, sizeof ro);
- if (port) {
- udp->uh_ulen = htons(len - sizeof(struct ip));
- if (V_udp_cksum) {
- udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
- } else {
- udp->uh_sum = 0;
- }
- }
- SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip_output:\n");
- SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, iph_out, &abm->sh);
- /* set IPv4 length */
- iph_out->ip_len = len;
- /* out it goes */
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(mout, len);
-#endif
- SCTP_ATTACH_CHAIN(o_pak, mout, len);
- if (port) {
-#if defined(SCTP_WITH_NO_CSUM)
- SCTP_STAT_INCR(sctps_sendnocrc);
-#else
- abm->sh.checksum = sctp_calculate_cksum(mout, iphlen_out);
- SCTP_STAT_INCR(sctps_sendswcrc);
-#endif
- if (V_udp_cksum) {
- SCTP_ENABLE_UDP_CSUM(o_pak);
- }
- } else {
-#if defined(SCTP_WITH_NO_CSUM)
- SCTP_STAT_INCR(sctps_sendnocrc);
-#else
- mout->m_pkthdr.csum_flags = CSUM_SCTP;
- mout->m_pkthdr.csum_data = 0;
- SCTP_STAT_INCR(sctps_sendhwcrc);
-#endif
- }
- SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id);
-
- /* Free the route if we got one back */
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
- }
-#endif
-#ifdef INET6
- if (ip6_out != NULL) {
- struct route_in6 ro;
- int ret;
- struct ifnet *ifp = NULL;
-
- /* zap the stack pointer to the route */
- bzero(&ro, sizeof(ro));
- if (port) {
- udp->uh_ulen = htons(len - sizeof(struct ip6_hdr));
- }
- SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip6_output:\n");
- SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, (struct ip *)ip6_out, &abm->sh);
- ip6_out->ip6_plen = len - sizeof(*ip6_out);
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(mout, len);
-#endif
- SCTP_ATTACH_CHAIN(o_pak, mout, len);
- if (port) {
-#if defined(SCTP_WITH_NO_CSUM)
- SCTP_STAT_INCR(sctps_sendnocrc);
-#else
- abm->sh.checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
- SCTP_STAT_INCR(sctps_sendswcrc);
-#endif
- if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) {
- udp->uh_sum = 0xffff;
- }
- } else {
-#if defined(SCTP_WITH_NO_CSUM)
- SCTP_STAT_INCR(sctps_sendnocrc);
-#else
- mout->m_pkthdr.csum_flags = CSUM_SCTP;
- mout->m_pkthdr.csum_data = 0;
- SCTP_STAT_INCR(sctps_sendhwcrc);
-#endif
- }
- SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id);
-
- /* Free the route if we got one back */
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
- }
-#endif
- SCTP_STAT_INCR(sctps_sendpackets);
- SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
- SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ sctp_send_resp_msg(src, dst, sh, vtag, SCTP_ABORT_ASSOCIATION, cause,
+ use_mflowid, mflowid,
+ vrf_id, port);
+ return;
}
void
-sctp_send_operr_to(struct mbuf *m, int iphlen, struct mbuf *scm, uint32_t vtag,
+sctp_send_operr_to(struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
+ uint8_t use_mflowid, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
- struct mbuf *o_pak;
- struct sctphdr *sh, *sh_out;
- struct sctp_chunkhdr *ch;
- struct ip *iph;
- struct udphdr *udp = NULL;
- struct mbuf *mout;
- int iphlen_out, len;
-
-#ifdef INET
- struct ip *iph_out;
-
-#endif
-#ifdef INET6
- struct ip6_hdr *ip6, *ip6_out;
-
-#endif
-
- iph = mtod(m, struct ip *);
- sh = (struct sctphdr *)((caddr_t)iph + iphlen);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- len = (sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr));
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- len = (sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr));
- break;
-#endif
- default:
- if (scm) {
- sctp_m_freem(scm);
- }
- return;
- }
- if (port) {
- len += sizeof(struct udphdr);
- }
- mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
- if (mout == NULL) {
- if (scm) {
- sctp_m_freem(scm);
- }
- return;
- }
- SCTP_BUF_RESV_UF(mout, max_linkhdr);
- SCTP_BUF_LEN(mout) = len;
- SCTP_BUF_NEXT(mout) = scm;
- if (m->m_flags & M_FLOWID) {
- mout->m_pkthdr.flowid = m->m_pkthdr.flowid;
- mout->m_flags |= M_FLOWID;
- }
-#ifdef INET
- iph_out = NULL;
-#endif
-#ifdef INET6
- ip6_out = NULL;
-#endif
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- iph_out = mtod(mout, struct ip *);
-
- /* Fill in the IP header for the ABORT */
- iph_out->ip_v = IPVERSION;
- iph_out->ip_hl = (sizeof(struct ip) / 4);
- iph_out->ip_tos = (u_char)0;
- iph_out->ip_id = 0;
- iph_out->ip_off = 0;
- iph_out->ip_ttl = MAXTTL;
- if (port) {
- iph_out->ip_p = IPPROTO_UDP;
- } else {
- iph_out->ip_p = IPPROTO_SCTP;
- }
- iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
- iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
- /* let IP layer calculate this */
- iph_out->ip_sum = 0;
-
- iphlen_out = sizeof(struct ip);
- sh_out = (struct sctphdr *)((caddr_t)iph_out + iphlen_out);
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- ip6 = (struct ip6_hdr *)iph;
- ip6_out = mtod(mout, struct ip6_hdr *);
-
- /* Fill in the IP6 header for the ABORT */
- ip6_out->ip6_flow = ip6->ip6_flow;
- ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
- if (port) {
- ip6_out->ip6_nxt = IPPROTO_UDP;
- } else {
- ip6_out->ip6_nxt = IPPROTO_SCTP;
- }
- ip6_out->ip6_src = ip6->ip6_dst;
- ip6_out->ip6_dst = ip6->ip6_src;
-
- iphlen_out = sizeof(struct ip6_hdr);
- sh_out = (struct sctphdr *)((caddr_t)ip6_out + iphlen_out);
- break;
-#endif /* INET6 */
- default:
- /* Currently not supported */
- sctp_m_freem(mout);
- return;
- }
-
- udp = (struct udphdr *)sh_out;
- if (port) {
- if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
- sctp_m_freem(mout);
- return;
- }
- udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
- udp->uh_dport = port;
- /* set udp->uh_ulen later */
- udp->uh_sum = 0;
- iphlen_out += sizeof(struct udphdr);
- sh_out = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
- }
- sh_out->src_port = sh->dest_port;
- sh_out->dest_port = sh->src_port;
- sh_out->v_tag = vtag;
- sh_out->checksum = 0;
-
- ch = (struct sctp_chunkhdr *)((caddr_t)sh_out + sizeof(struct sctphdr));
- ch->chunk_type = SCTP_OPERATION_ERROR;
- ch->chunk_flags = 0;
-
- if (scm) {
- struct mbuf *m_tmp = scm;
- int cause_len = 0;
-
- /* get length of the err_cause chain */
- while (m_tmp != NULL) {
- cause_len += SCTP_BUF_LEN(m_tmp);
- m_tmp = SCTP_BUF_NEXT(m_tmp);
- }
- len = SCTP_BUF_LEN(mout) + cause_len;
- if (cause_len % 4) {
- /* need pad at end of chunk */
- uint32_t cpthis = 0;
- int padlen;
-
- padlen = 4 - (len % 4);
- m_copyback(mout, len, padlen, (caddr_t)&cpthis);
- len += padlen;
- }
- ch->chunk_length = htons(sizeof(struct sctp_chunkhdr) + cause_len);
- } else {
- len = SCTP_BUF_LEN(mout);
- ch->chunk_length = htons(sizeof(struct sctp_chunkhdr));
- }
-
- if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
- /* no mbuf's */
- sctp_m_freem(mout);
- return;
- }
-#ifdef INET
- if (iph_out != NULL) {
- sctp_route_t ro;
- int ret;
-
- /* zap the stack pointer to the route */
- bzero(&ro, sizeof ro);
- if (port) {
- udp->uh_ulen = htons(len - sizeof(struct ip));
- if (V_udp_cksum) {
- udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
- } else {
- udp->uh_sum = 0;
- }
- }
- /* set IPv4 length */
- iph_out->ip_len = len;
- /* out it goes */
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(mout, len);
-#endif
- SCTP_ATTACH_CHAIN(o_pak, mout, len);
- if (port) {
-#if defined(SCTP_WITH_NO_CSUM)
- SCTP_STAT_INCR(sctps_sendnocrc);
-#else
- sh_out->checksum = sctp_calculate_cksum(mout, iphlen_out);
- SCTP_STAT_INCR(sctps_sendswcrc);
-#endif
- if (V_udp_cksum) {
- SCTP_ENABLE_UDP_CSUM(o_pak);
- }
- } else {
-#if defined(SCTP_WITH_NO_CSUM)
- SCTP_STAT_INCR(sctps_sendnocrc);
-#else
- mout->m_pkthdr.csum_flags = CSUM_SCTP;
- mout->m_pkthdr.csum_data = 0;
- SCTP_STAT_INCR(sctps_sendhwcrc);
-#endif
- }
- SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id);
-
- /* Free the route if we got one back */
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
- }
-#endif
-#ifdef INET6
- if (ip6_out != NULL) {
- struct route_in6 ro;
- int ret;
- struct ifnet *ifp = NULL;
-
- /* zap the stack pointer to the route */
- bzero(&ro, sizeof(ro));
- if (port) {
- udp->uh_ulen = htons(len - sizeof(struct ip6_hdr));
- }
- ip6_out->ip6_plen = len - sizeof(*ip6_out);
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(mout, len);
-#endif
- SCTP_ATTACH_CHAIN(o_pak, mout, len);
- if (port) {
-#if defined(SCTP_WITH_NO_CSUM)
- SCTP_STAT_INCR(sctps_sendnocrc);
-#else
- sh_out->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
- SCTP_STAT_INCR(sctps_sendswcrc);
-#endif
- if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) {
- udp->uh_sum = 0xffff;
- }
- } else {
-#if defined(SCTP_WITH_NO_CSUM)
- SCTP_STAT_INCR(sctps_sendnocrc);
-#else
- mout->m_pkthdr.csum_flags = CSUM_SCTP;
- mout->m_pkthdr.csum_data = 0;
- SCTP_STAT_INCR(sctps_sendhwcrc);
-#endif
- }
- SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id);
-
- /* Free the route if we got one back */
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
- }
-#endif
- SCTP_STAT_INCR(sctps_sendpackets);
- SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
- SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ sctp_send_resp_msg(src, dst, sh, vtag, SCTP_OPERATION_ERROR, cause,
+ use_mflowid, mflowid,
+ vrf_id, port);
+ return;
}
static struct mbuf *
@@ -12750,7 +12174,7 @@
union sctp_sockstore *raddr = (union sctp_sockstore *)addr;
switch (raddr->sa.sa_family) {
-#if defined(INET)
+#ifdef INET
case AF_INET:
if (raddr->sin.sin_len != sizeof(struct sockaddr_in)) {
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
@@ -12760,7 +12184,7 @@
port = raddr->sin.sin_port;
break;
#endif
-#if defined(INET6)
+#ifdef INET6
case AF_INET6:
if (raddr->sin6.sin6_len != sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
@@ -13138,9 +12562,7 @@
atomic_add_int(&stcb->asoc.refcnt, -1);
free_cnt_applied = 0;
/* release this lock, otherwise we hang on ourselves */
- sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_RESPONSE_TO_USER_REQ,
- mm, SCTP_SO_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, mm, SCTP_SO_LOCKED);
/* now relock the stcb so everything is sane */
hold_tcblock = 0;
stcb = NULL;
@@ -13617,8 +13039,7 @@
dataless_eof:
/* EOF thing ? */
if ((srcv->sinfo_flags & SCTP_EOF) &&
- (got_all_of_the_send == 1) &&
- (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
+ (got_all_of_the_send == 1)) {
int cnt;
SCTP_STAT_INCR(sctps_sends_with_eof);
@@ -13695,7 +13116,6 @@
free_cnt_applied = 0;
}
sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_RESPONSE_TO_USER_REQ,
NULL, SCTP_SO_LOCKED);
/*
* now relock the stcb so everything
@@ -13840,7 +13260,7 @@
if (inp) {
sctp_validate_no_locks(inp);
} else {
- printf("Warning - inp is NULL so cant validate locks\n");
+ SCTP_PRINTF("Warning - inp is NULL so cant validate locks\n");
}
#endif
if (top) {
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_output.h
--- a/head/sys/netinet/sctp_output.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_output.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,13 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_output.h,v 1.14 2005/03/06 16:04:18 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_output.h 237715 2012-06-28 16:01:08Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_output.h 233660 2012-03-29 13:36:53Z rrs $");
-
-#ifndef __sctp_output_h__
-#define __sctp_output_h__
+#ifndef _NETINET_SCTP_OUTPUT_H_
+#define _NETINET_SCTP_OUTPUT_H_
#include <netinet/sctp_header.h>
@@ -85,8 +83,11 @@
);
void
-sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
- struct mbuf *, int, int, struct sctphdr *, struct sctp_init_chunk *,
+sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *,
+ int, int,
+ struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, struct sctp_init_chunk *,
+ uint8_t, uint32_t,
uint32_t, uint16_t, int);
struct mbuf *
@@ -117,7 +118,9 @@
void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *, int);
void
-sctp_send_shutdown_complete2(struct mbuf *, struct sctphdr *,
+sctp_send_shutdown_complete2(struct sockaddr *, struct sockaddr *,
+ struct sctphdr *,
+ uint8_t, uint32_t,
uint32_t, uint16_t);
void sctp_send_asconf(struct sctp_tcb *, struct sctp_nets *, int addr_locked);
@@ -162,7 +165,7 @@
void
sctp_send_packet_dropped(struct sctp_tcb *, struct sctp_nets *, struct mbuf *,
- int, int);
+ int, int, int);
@@ -203,14 +206,20 @@
uint16_t adding_i, uint8_t from_peer);
void
-sctp_send_abort(struct mbuf *, int, struct sctphdr *, uint32_t,
- struct mbuf *, uint32_t, uint16_t);
+sctp_send_abort(struct mbuf *, int, struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, uint32_t, struct mbuf *,
+ uint8_t, uint32_t,
+ uint32_t, uint16_t);
-void sctp_send_operr_to(struct mbuf *, int, struct mbuf *, uint32_t, uint32_t, uint16_t);
+void
+sctp_send_operr_to(struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, uint32_t, struct mbuf *,
+ uint8_t, uint32_t,
+ uint32_t, uint16_t);
#endif /* _KERNEL || __Userspace__ */
-#if defined(_KERNEL) || defined (__Userspace__)
+#if defined(_KERNEL) || defined(__Userspace__)
int
sctp_sosend(struct socket *so,
struct sockaddr *addr,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_pcb.c
--- a/head/sys/netinet/sctp_pcb.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_pcb.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_pcb.c,v 1.38 2005/03/06 16:04:18 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_pcb.c 228907 2011-12-27 10:16:24Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_pcb.c 238550 2012-07-17 13:03:47Z tuexen $");
#include <netinet/sctp_os.h>
#include <sys/proc.h>
@@ -1205,11 +1203,18 @@
uint16_t rport;
inp = *inp_p;
- if (remote->sa_family == AF_INET) {
+ switch (remote->sa_family) {
+#ifdef INET
+ case AF_INET:
rport = (((struct sockaddr_in *)remote)->sin_port);
- } else if (remote->sa_family == AF_INET6) {
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
rport = (((struct sockaddr_in6 *)remote)->sin6_port);
- } else {
+ break;
+#endif
+ default:
return (NULL);
}
if (locked_tcb) {
@@ -1893,7 +1898,7 @@
* need to change the *to to some other struct like a mbuf...
*/
struct sctp_tcb *
-sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from,
+sctp_findassociation_addr_sa(struct sockaddr *from, struct sockaddr *to,
struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool,
uint32_t vrf_id)
{
@@ -1948,7 +1953,7 @@
static struct sctp_tcb *
sctp_findassociation_special_addr(struct mbuf *m, int offset,
struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp,
- struct sockaddr *dest)
+ struct sockaddr *dst)
{
struct sctp_paramhdr *phdr, parm_buf;
struct sctp_tcb *retval;
@@ -2002,7 +2007,7 @@
memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr));
/* look it up */
retval = sctp_findassociation_ep_addr(inp_p,
- (struct sockaddr *)&sin4, netp, dest, NULL);
+ (struct sockaddr *)&sin4, netp, dst, NULL);
if (retval != NULL) {
return (retval);
}
@@ -2023,7 +2028,7 @@
memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr));
/* look it up */
retval = sctp_findassociation_ep_addr(inp_p,
- (struct sockaddr *)&sin6, netp, dest, NULL);
+ (struct sockaddr *)&sin6, netp, dst, NULL);
if (retval != NULL) {
return (retval);
}
@@ -2146,105 +2151,17 @@
*/
struct sctp_tcb *
sctp_findassociation_addr(struct mbuf *m, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_chunkhdr *ch,
struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
{
int find_tcp_pool;
- struct ip *iph;
struct sctp_tcb *retval;
- struct sockaddr_storage to_store, from_store;
- struct sockaddr *to = (struct sockaddr *)&to_store;
- struct sockaddr *from = (struct sockaddr *)&from_store;
struct sctp_inpcb *inp;
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* its IPv4 */
- struct sockaddr_in *from4;
-
- from4 = (struct sockaddr_in *)&from_store;
- bzero(from4, sizeof(*from4));
- from4->sin_family = AF_INET;
- from4->sin_len = sizeof(struct sockaddr_in);
- from4->sin_addr.s_addr = iph->ip_src.s_addr;
- from4->sin_port = sh->src_port;
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- /* its IPv6 */
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *from6;
-
- ip6 = mtod(m, struct ip6_hdr *);
- from6 = (struct sockaddr_in6 *)&from_store;
- bzero(from6, sizeof(*from6));
- from6->sin6_family = AF_INET6;
- from6->sin6_len = sizeof(struct sockaddr_in6);
- from6->sin6_addr = ip6->ip6_src;
- from6->sin6_port = sh->src_port;
- /* Get the scopes in properly to the sin6 addr's */
- /* we probably don't need these operations */
- (void)sa6_recoverscope(from6);
- sa6_embedscope(from6, MODULE_GLOBAL(ip6_use_defzone));
- break;
- }
-#endif
- default:
- /* Currently not supported. */
- return (NULL);
- }
-
-
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* its IPv4 */
- struct sockaddr_in *to4;
-
- to4 = (struct sockaddr_in *)&to_store;
- bzero(to4, sizeof(*to4));
- to4->sin_family = AF_INET;
- to4->sin_len = sizeof(struct sockaddr_in);
- to4->sin_addr.s_addr = iph->ip_dst.s_addr;
- to4->sin_port = sh->dest_port;
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- /* its IPv6 */
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *to6;
-
- ip6 = mtod(m, struct ip6_hdr *);
- to6 = (struct sockaddr_in6 *)&to_store;
- bzero(to6, sizeof(*to6));
- to6->sin6_family = AF_INET6;
- to6->sin6_len = sizeof(struct sockaddr_in6);
- to6->sin6_addr = ip6->ip6_dst;
- to6->sin6_port = sh->dest_port;
- /* Get the scopes in properly to the sin6 addr's */
- /* we probably don't need these operations */
- (void)sa6_recoverscope(to6);
- sa6_embedscope(to6, MODULE_GLOBAL(ip6_use_defzone));
- break;
- }
-#endif
- default:
- /* TSNH */
- break;
- }
if (sh->v_tag) {
/* we only go down this path if vtag is non-zero */
- retval = sctp_findassoc_by_vtag(from, to, ntohl(sh->v_tag),
+ retval = sctp_findassoc_by_vtag(src, dst, ntohl(sh->v_tag),
inp_p, netp, sh->src_port, sh->dest_port, 0, vrf_id, 0);
if (retval) {
return (retval);
@@ -2259,11 +2176,11 @@
find_tcp_pool = 1;
}
if (inp_p) {
- retval = sctp_findassociation_addr_sa(to, from, inp_p, netp,
+ retval = sctp_findassociation_addr_sa(src, dst, inp_p, netp,
find_tcp_pool, vrf_id);
inp = *inp_p;
} else {
- retval = sctp_findassociation_addr_sa(to, from, &inp, netp,
+ retval = sctp_findassociation_addr_sa(src, dst, &inp, netp,
find_tcp_pool, vrf_id);
}
SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp);
@@ -2286,7 +2203,7 @@
return (NULL);
}
retval = sctp_findassociation_special_addr(m,
- offset, sh, &inp, netp, to);
+ offset, sh, &inp, netp, dst);
if (inp_p != NULL) {
*inp_p = inp;
}
@@ -2302,12 +2219,11 @@
*/
struct sctp_tcb *
sctp_findassociation_ep_asconf(struct mbuf *m, int offset,
- struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
+ struct sockaddr *dst, struct sctphdr *sh,
+ struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
{
struct sctp_tcb *stcb;
- struct sockaddr_storage local_store, remote_store;
- struct sockaddr *to;
- struct ip *iph;
+ struct sockaddr_storage remote_store;
struct sctp_paramhdr parm_buf, *phdr;
int ptype;
int zero_address = 0;
@@ -2317,42 +2233,11 @@
#endif
#ifdef INET6
- struct ip6_hdr *ip6;
struct sockaddr_in6 *sin6;
#endif
- memset(&local_store, 0, sizeof(local_store));
memset(&remote_store, 0, sizeof(remote_store));
- to = (struct sockaddr *)&local_store;
- /* First get the destination address setup too. */
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- /* its IPv4 */
- sin = (struct sockaddr_in *)&local_store;
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- sin->sin_port = sh->dest_port;
- sin->sin_addr.s_addr = iph->ip_dst.s_addr;
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- /* its IPv6 */
- ip6 = mtod(m, struct ip6_hdr *);
- sin6 = (struct sockaddr_in6 *)&local_store;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(*sin6);
- sin6->sin6_port = sh->dest_port;
- sin6->sin6_addr = ip6->ip6_dst;
- break;
-#endif
- default:
- return NULL;
- }
-
phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk),
&parm_buf, sizeof(struct sctp_paramhdr));
if (phdr == NULL) {
@@ -2423,16 +2308,16 @@
}
if (zero_address) {
- stcb = sctp_findassoc_by_vtag(NULL, to, ntohl(sh->v_tag), inp_p,
+ stcb = sctp_findassoc_by_vtag(NULL, dst, ntohl(sh->v_tag), inp_p,
netp, sh->src_port, sh->dest_port, 1, vrf_id, 0);
/*
- * printf("findassociation_ep_asconf: zero lookup address
- * finds stcb 0x%x\n", (uint32_t)stcb);
+ * SCTP_PRINTF("findassociation_ep_asconf: zero lookup
+ * address finds stcb 0x%x\n", (uint32_t)stcb);
*/
} else {
stcb = sctp_findassociation_ep_addr(inp_p,
(struct sockaddr *)&remote_store, netp,
- to, NULL);
+ dst, NULL);
}
return (stcb);
}
@@ -3131,6 +3016,7 @@
memset(&store_sa, 0, sizeof(store_sa));
switch (addr->sa_family) {
+#ifdef INET
case AF_INET:
{
struct sockaddr_in *sin;
@@ -3140,6 +3026,8 @@
sin->sin_port = 0;
break;
}
+#endif
+#ifdef INET6
case AF_INET6:
{
struct sockaddr_in6 *sin6;
@@ -3149,6 +3037,7 @@
sin6->sin6_port = 0;
break;
}
+#endif
default:
break;
}
@@ -3426,9 +3315,6 @@
*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3);
}
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
-#if defined(SCTP_PANIC_ON_ABORT)
- panic("inpcb_free does an abort");
-#endif
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
@@ -3520,10 +3406,6 @@
*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5);
}
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
-#if defined(SCTP_PANIC_ON_ABORT)
- panic("inpcb_free does an abort");
-#endif
-
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
@@ -3604,9 +3486,6 @@
}
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7;
-#if defined(SCTP_PANIC_ON_ABORT)
- panic("inpcb_free does an abort");
-#endif
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
} else if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
@@ -4996,30 +4875,25 @@
/* now clean up any chunks here */
TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) {
TAILQ_REMOVE(&outs->outqueue, sp, next);
+ sctp_free_spbufspace(stcb, asoc, sp);
if (sp->data) {
if (so) {
/* Still an open socket - report */
sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
- SCTP_NOTIFY_DATAGRAM_UNSENT,
- (void *)sp, SCTP_SO_LOCKED);
+ 0, (void *)sp, SCTP_SO_LOCKED);
}
if (sp->data) {
sctp_m_freem(sp->data);
sp->data = NULL;
sp->tail_mbuf = NULL;
+ sp->length = 0;
}
}
if (sp->net) {
sctp_free_remote_addr(sp->net);
sp->net = NULL;
}
- sctp_free_spbufspace(stcb, asoc, sp);
- if (sp->holds_key_ref)
- sctp_auth_key_release(stcb, sp->auth_keyid, SCTP_SO_LOCKED);
- /* Free the zone stuff */
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_strmoq), sp);
- SCTP_DECR_STRMOQ_COUNT();
- /* sa_ignore FREED_MEMORY */
+ sctp_free_a_strmoq(stcb, sp, SCTP_SO_LOCKED);
}
}
/* sa_ignore FREED_MEMORY */
@@ -5061,8 +4935,8 @@
if (chk->data) {
if (so) {
/* Still a socket? */
- sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
- SCTP_NOTIFY_DATAGRAM_UNSENT, chk, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
+ 0, chk, SCTP_SO_LOCKED);
}
if (chk->data) {
sctp_m_freem(chk->data);
@@ -5085,8 +4959,8 @@
if (chk->data) {
if (so) {
/* Still a socket? */
- sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
- SCTP_NOTIFY_DATAGRAM_SENT, chk, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb,
+ 0, chk, SCTP_SO_LOCKED);
}
if (chk->data) {
sctp_m_freem(chk->data);
@@ -5321,12 +5195,16 @@
}
/* NOTE: all "scope" checks are done when local addresses are added */
switch (destaddr->sa_family) {
+#ifdef INET6
case AF_INET6:
answer = inp->ip_inp.inp.inp_vflag & INP_IPV6;
break;
+#endif
+#ifdef INET
case AF_INET:
answer = inp->ip_inp.inp.inp_vflag & INP_IPV4;
break;
+#endif
default:
/* invalid family, so it's unreachable */
answer = 0;
@@ -5417,7 +5295,7 @@
inp->ip_inp.inp.inp_vflag |= INP_IPV6;
break;
#endif
-#ifdef INET6
+#ifdef INET
case AF_INET:
inp->ip_inp.inp.inp_vflag |= INP_IPV4;
break;
@@ -5745,7 +5623,7 @@
if (v6 == 0) {
sctp_input_with_port(m, off, 0);
} else {
- printf("V6 not yet supported\n");
+ SCTP_PRINTF("V6 not yet supported\n");
sctp_m_freem(m);
}
CURVNET_RESTORE();
@@ -6096,7 +5974,8 @@
int
sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
- int offset, int limit, struct sctphdr *sh,
+ int offset, int limit,
+ struct sockaddr *src, struct sockaddr *dst,
struct sockaddr *altsa)
{
/*
@@ -6108,13 +5987,10 @@
*/
struct sctp_inpcb *inp;
struct sctp_nets *net, *nnet, *net_tmp;
- struct ip *iph;
struct sctp_paramhdr *phdr, parm_buf;
struct sctp_tcb *stcb_tmp;
uint16_t ptype, plen;
struct sockaddr *sa;
- struct sockaddr_storage dest_store;
- struct sockaddr *local_sa = (struct sockaddr *)&dest_store;
uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
struct sctp_auth_random *p_random = NULL;
uint16_t random_len = 0;
@@ -6153,65 +6029,10 @@
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_port = stcb->rport;
#endif
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* its IPv4 */
- struct sockaddr_in *sin_2;
-
- sin_2 = (struct sockaddr_in *)(local_sa);
- memset(sin_2, 0, sizeof(sin));
- sin_2->sin_family = AF_INET;
- sin_2->sin_len = sizeof(sin);
- sin_2->sin_port = sh->dest_port;
- sin_2->sin_addr.s_addr = iph->ip_dst.s_addr;
- if (altsa) {
- /*
- * For cookies we use the src address NOT
- * from the packet but from the original
- * INIT.
- */
- sa = altsa;
- } else {
- sin.sin_addr = iph->ip_src;
- sa = (struct sockaddr *)&sin;
- }
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- /* its IPv6 */
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *sin6_2;
-
- ip6 = mtod(m, struct ip6_hdr *);
- sin6_2 = (struct sockaddr_in6 *)(local_sa);
- memset(sin6_2, 0, sizeof(sin6));
- sin6_2->sin6_family = AF_INET6;
- sin6_2->sin6_len = sizeof(struct sockaddr_in6);
- sin6_2->sin6_port = sh->dest_port;
- sin6_2->sin6_addr = ip6->ip6_dst;
- if (altsa) {
- /*
- * For cookies we use the src address NOT
- * from the packet but from the original
- * INIT.
- */
- sa = altsa;
- } else {
- sin6.sin6_addr = ip6->ip6_src;
- sa = (struct sockaddr *)&sin6;
- }
- break;
- }
-#endif
- default:
- return (-1);
- break;
+ if (altsa) {
+ sa = altsa;
+ } else {
+ sa = src;
}
/* Turn off ECN until we get through all params */
ecn_allowed = 0;
@@ -6222,7 +6043,7 @@
/* does the source address already exist? if so skip it */
inp = stcb->sctp_ep;
atomic_add_int(&stcb->asoc.refcnt, 1);
- stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb);
+ stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, dst, stcb);
atomic_add_int(&stcb->asoc.refcnt, -1);
if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) {
@@ -6276,8 +6097,8 @@
ptype = ntohs(phdr->param_type);
plen = ntohs(phdr->param_length);
/*
- * printf("ptype => %0x, plen => %d\n", (uint32_t)ptype,
- * (int)plen);
+ * SCTP_PRINTF("ptype => %0x, plen => %d\n",
+ * (uint32_t)ptype, (int)plen);
*/
if (offset + plen > limit) {
break;
@@ -6312,7 +6133,7 @@
inp = stcb->sctp_ep;
atomic_add_int(&stcb->asoc.refcnt, 1);
stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
- local_sa, stcb);
+ dst, stcb);
atomic_add_int(&stcb->asoc.refcnt, -1);
if ((stcb_tmp == NULL && inp == stcb->sctp_ep) ||
@@ -6357,7 +6178,7 @@
* abort this guy
*/
sctp_abort_an_association(stcb_tmp->sctp_ep,
- stcb_tmp, 1, NULL, 0);
+ stcb_tmp, NULL, SCTP_SO_NOT_LOCKED);
goto add_it_now;
}
SCTP_TCB_UNLOCK(stcb_tmp);
@@ -6402,7 +6223,7 @@
inp = stcb->sctp_ep;
atomic_add_int(&stcb->asoc.refcnt, 1);
stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
- local_sa, stcb);
+ dst, stcb);
atomic_add_int(&stcb->asoc.refcnt, -1);
if (stcb_tmp == NULL &&
(inp == stcb->sctp_ep || inp == NULL)) {
@@ -6448,7 +6269,7 @@
* abort this guy
*/
sctp_abort_an_association(stcb_tmp->sctp_ep,
- stcb_tmp, 1, NULL, 0);
+ stcb_tmp, NULL, SCTP_SO_NOT_LOCKED);
goto add_it_now6;
}
SCTP_TCB_UNLOCK(stcb_tmp);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_pcb.h
--- a/head/sys/netinet/sctp_pcb.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_pcb.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,8 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
@@ -29,13 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_pcb.h,v 1.21 2005/07/16 01:18:47 suz Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_pcb.h 237715 2012-06-28 16:01:08Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_pcb.h 233660 2012-03-29 13:36:53Z rrs $");
-
-#ifndef __sctp_pcb_h__
-#define __sctp_pcb_h__
+#ifndef _NETINET_SCTP_PCB_H_
+#define _NETINET_SCTP_PCB_H_
#include <netinet/sctp_os.h>
#include <netinet/sctp.h>
@@ -529,6 +528,7 @@
struct sctp_tcb *
sctp_findassociation_addr(struct mbuf *, int,
+ struct sockaddr *, struct sockaddr *,
struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb **,
struct sctp_nets **, uint32_t vrf_id);
@@ -559,7 +559,7 @@
sctp_assoc_t, int);
struct sctp_tcb *
-sctp_findassociation_ep_asconf(struct mbuf *, int,
+sctp_findassociation_ep_asconf(struct mbuf *, int, struct sockaddr *,
struct sctphdr *, struct sctp_inpcb **, struct sctp_nets **, uint32_t vrf_id);
int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id);
@@ -604,8 +604,8 @@
void sctp_del_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
int
-sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int,
- int, struct sctphdr *, struct sockaddr *);
+sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int,
+ struct sockaddr *, struct sockaddr *, struct sockaddr *);
int
sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_peeloff.c
--- a/head/sys/netinet/sctp_peeloff.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_peeloff.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,11 +30,9 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_peeloff.c 235828 2012-05-23 11:26:28Z tuexen $");
-/* $KAME: sctp_peeloff.c,v 1.13 2005/03/06 16:04:18 itojun Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_peeloff.c 233660 2012-03-29 13:36:53Z rrs $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_pcb.h>
#include <netinet/sctputil.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_peeloff.h
--- a/head/sys/netinet/sctp_peeloff.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_peeloff.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,13 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_peeloff.h,v 1.6 2005/03/06 16:04:18 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_peeloff.h 235828 2012-05-23 11:26:28Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_peeloff.h 228653 2011-12-17 19:21:40Z tuexen $");
-
-#ifndef __sctp_peeloff_h__
-#define __sctp_peeloff_h__
+#ifndef _NETINET_SCTP_PEELOFF_H_
+#define _NETINET_SCTP_PEELOFF_H_
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_ss_functions.c
--- a/head/sys/netinet/sctp_ss_functions.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_ss_functions.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
- * Copyright (c) 2010-2011, by Michael Tuexen. All rights reserved.
- * Copyright (c) 2010-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2010-2011, by Robin Seggelmann. All rights reserved.
+ * Copyright (c) 2010-2012, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2010-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2010-2012, by Robin Seggelmann. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_ss_functions.c 228907 2011-12-27 10:16:24Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_ss_functions.c 235828 2012-05-23 11:26:28Z tuexen $");
#include <netinet/sctp_pcb.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_structs.h
--- a/head/sys/netinet/sctp_structs.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_structs.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,13 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_structs.h,v 1.13 2005/03/06 16:04:18 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_structs.h 235828 2012-05-23 11:26:28Z tuexen $");
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_structs.h 233660 2012-03-29 13:36:53Z rrs $");
-
-#ifndef __sctp_structs_h__
-#define __sctp_structs_h__
+#ifndef _NETINET_SCTP_STRUCTS_H_
+#define _NETINET_SCTP_STRUCTS_H_
#include <netinet/sctp_os.h>
#include <netinet/sctp_header.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_sysctl.c
--- a/head/sys/netinet/sctp_sysctl.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_sysctl.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_sysctl.c 229805 2012-01-08 09:56:24Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_sysctl.c 237565 2012-06-25 17:15:09Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp.h>
@@ -55,9 +55,6 @@
SCTP_BASE_SYSCTL(sctp_multiple_asconfs) = SCTPCTL_MULTIPLEASCONFS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_ecn_enable) = SCTPCTL_ECN_ENABLE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_strict_sacks) = SCTPCTL_STRICT_SACKS_DEFAULT;
-#if !defined(SCTP_WITH_NO_CSUM)
- SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) = SCTPCTL_LOOPBACK_NOCSUM_DEFAULT;
-#endif
SCTP_BASE_SYSCTL(sctp_peer_chunk_oh) = SCTPCTL_PEER_CHKOH_DEFAULT;
SCTP_BASE_SYSCTL(sctp_max_burst_default) = SCTPCTL_MAXBURST_DEFAULT;
SCTP_BASE_SYSCTL(sctp_fr_max_burst_default) = SCTPCTL_FRMAXBURST_DEFAULT;
@@ -604,9 +601,6 @@
RANGECHK(SCTP_BASE_SYSCTL(sctp_auto_asconf), SCTPCTL_AUTOASCONF_MIN, SCTPCTL_AUTOASCONF_MAX);
RANGECHK(SCTP_BASE_SYSCTL(sctp_ecn_enable), SCTPCTL_ECN_ENABLE_MIN, SCTPCTL_ECN_ENABLE_MAX);
RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_sacks), SCTPCTL_STRICT_SACKS_MIN, SCTPCTL_STRICT_SACKS_MAX);
-#if !defined(SCTP_WITH_NO_CSUM)
- RANGECHK(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback), SCTPCTL_LOOPBACK_NOCSUM_MIN, SCTPCTL_LOOPBACK_NOCSUM_MAX);
-#endif
RANGECHK(SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), SCTPCTL_PEER_CHKOH_MIN, SCTPCTL_PEER_CHKOH_MAX);
RANGECHK(SCTP_BASE_SYSCTL(sctp_max_burst_default), SCTPCTL_MAXBURST_MIN, SCTPCTL_MAXBURST_MAX);
RANGECHK(SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), SCTPCTL_FRMAXBURST_MIN, SCTPCTL_FRMAXBURST_MAX);
@@ -671,7 +665,7 @@
#ifdef SCTP_DEBUG
RANGECHK(SCTP_BASE_SYSCTL(sctp_debug_on), SCTPCTL_DEBUG_MIN, SCTPCTL_DEBUG_MAX);
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
RANGECHK(SCTP_BASE_SYSCTL(sctp_output_unlocked), SCTPCTL_OUTPUT_UNLOCKED_MIN, SCTPCTL_OUTPUT_UNLOCKED_MAX);
#endif
}
@@ -870,11 +864,6 @@
&SCTP_BASE_SYSCTL(sctp_strict_sacks), 0, sysctl_sctp_check, "IU",
SCTPCTL_STRICT_SACKS_DESC);
-#if !defined(SCTP_WITH_NO_CSUM)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, loopback_nocsum, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback), 0, sysctl_sctp_check, "IU",
- SCTPCTL_LOOPBACK_NOCSUM_DESC);
-#endif
SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_UINT | CTLFLAG_RW,
&SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), 0, sysctl_sctp_check, "IU",
@@ -1136,7 +1125,7 @@
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, output_unlocked, CTLTYPE_UINT | CTLFLAG_RW,
&SCTP_BASE_SYSCTL(sctp_output_unlocked), 0, sysctl_sctp_check, "IU",
SCTPCTL_OUTPUT_UNLOCKED_DESC);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_sysctl.h
--- a/head/sys/netinet/sctp_sysctl.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_sysctl.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -31,10 +31,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_sysctl.h 229805 2012-01-08 09:56:24Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_sysctl.h 237565 2012-06-25 17:15:09Z tuexen $");
-#ifndef __sctp_sysctl_h__
-#define __sctp_sysctl_h__
+#ifndef _NETINET_SCTP_SYSCTL_H_
+#define _NETINET_SCTP_SYSCTL_H_
#include <netinet/sctp_os.h>
#include <netinet/sctp_constants.h>
@@ -47,9 +47,6 @@
uint32_t sctp_ecn_enable;
uint32_t sctp_fr_max_burst_default;
uint32_t sctp_strict_sacks;
-#if !defined(SCTP_WITH_NO_CSUM)
- uint32_t sctp_no_csum_on_loopback;
-#endif
uint32_t sctp_peer_chunk_oh;
uint32_t sctp_max_burst_default;
uint32_t sctp_max_chunks_on_queue;
@@ -377,7 +374,7 @@
#define SCTPCTL_ABC_L_VAR_DESC "SCTP ABC max increase per SACK (L)"
#define SCTPCTL_ABC_L_VAR_MIN 0
#define SCTPCTL_ABC_L_VAR_MAX 0xFFFFFFFF
-#define SCTPCTL_ABC_L_VAR_DEFAULT 1
+#define SCTPCTL_ABC_L_VAR_DEFAULT 2
/* max_chained_mbufs: Default max number of small mbufs on a chain */
#define SCTPCTL_MAX_CHAINED_MBUFS_DESC "Default max number of small mbufs on a chain"
@@ -534,7 +531,7 @@
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
#define SCTPCTL_OUTPUT_UNLOCKED_DESC "Unlock socket when sending packets down to IP."
#define SCTPCTL_OUTPUT_UNLOCKED_MIN 0
#define SCTPCTL_OUTPUT_UNLOCKED_MAX 1
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_timer.c
--- a/head/sys/netinet/sctp_timer.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_timer.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_timer.c,v 1.29 2005/03/06 16:04:18 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_timer.c 234297 2012-04-14 21:01:44Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_timer.c 235828 2012-05-23 11:26:28Z tuexen $");
#define _IP_VHL
#include <netinet/sctp_os.h>
@@ -101,8 +99,7 @@
net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
net->dest_state &= ~SCTP_ADDR_PF;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
- stcb,
- SCTP_FAILED_THRESHOLD,
+ stcb, 0,
(void *)net, SCTP_SO_NOT_LOCKED);
}
} else if ((net->pf_threshold < net->failure_threshold) &&
@@ -167,7 +164,7 @@
*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
}
inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
- sctp_abort_an_association(inp, stcb, SCTP_FAILED_THRESHOLD, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(inp, stcb, oper, SCTP_SO_NOT_LOCKED);
return (1);
}
return (0);
@@ -615,7 +612,7 @@
if (chk->data) {
(void)sctp_release_pr_sctp_chunk(stcb,
chk,
- (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ 1,
SCTP_SO_NOT_LOCKED);
cnt_abandoned++;
}
@@ -628,7 +625,7 @@
if (chk->data) {
(void)sctp_release_pr_sctp_chunk(stcb,
chk,
- (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ 1,
SCTP_SO_NOT_LOCKED);
cnt_abandoned++;
}
@@ -1066,8 +1063,7 @@
*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
}
inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
- sctp_abort_an_association(inp, stcb, SCTP_INTERNAL_ERROR,
- oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(inp, stcb, oper, SCTP_SO_NOT_LOCKED);
} else {
#ifdef INVARIANTS
panic("Cookie timer expires in wrong state?");
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_timer.h
--- a/head/sys/netinet/sctp_timer.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_timer.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,12 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_timer.h,v 1.6 2005/03/06 16:04:18 itojun Exp $ */
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_timer.h 228653 2011-12-17 19:21:40Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_timer.h 235828 2012-05-23 11:26:28Z tuexen $");
-#ifndef __sctp_timer_h__
-#define __sctp_timer_h__
+#ifndef _NETINET_SCTP_TIMER_H_
+#define _NETINET_SCTP_TIMER_H_
#if defined(_KERNEL) || defined(__Userspace__)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_uio.h
--- a/head/sys/netinet/sctp_uio.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_uio.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,12 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_uio.h,v 1.11 2005/03/06 16:04:18 itojun Exp $ */
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_uio.h 233660 2012-03-29 13:36:53Z rrs $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_uio.h 238501 2012-07-15 20:16:17Z tuexen $");
-#ifndef __sctp_uio_h__
-#define __sctp_uio_h__
+#ifndef _NETINET_SCTP_UIO_H_
+#define _NETINET_SCTP_UIO_H_
#if ! defined(_KERNEL)
@@ -169,7 +168,7 @@
};
struct sctp_authinfo {
- uint16_t auth_keyid;
+ uint16_t auth_keynumber;
};
struct sctp_rcvinfo {
@@ -296,16 +295,23 @@
uint16_t sac_outbound_streams;
uint16_t sac_inbound_streams;
sctp_assoc_t sac_assoc_id;
+ uint8_t sac_info[];
};
/* sac_state values */
-#define SCTP_COMM_UP 0x0001
-#define SCTP_COMM_LOST 0x0002
-#define SCTP_RESTART 0x0003
-#define SCTP_SHUTDOWN_COMP 0x0004
-#define SCTP_CANT_STR_ASSOC 0x0005
+#define SCTP_COMM_UP 0x0001
+#define SCTP_COMM_LOST 0x0002
+#define SCTP_RESTART 0x0003
+#define SCTP_SHUTDOWN_COMP 0x0004
+#define SCTP_CANT_STR_ASSOC 0x0005
-
+/* sac_info values */
+#define SCTP_ASSOC_SUPPORTS_PR 0x01
+#define SCTP_ASSOC_SUPPORTS_AUTH 0x02
+#define SCTP_ASSOC_SUPPORTS_ASCONF 0x03
+#define SCTP_ASSOC_SUPPORTS_MULTIBUF 0x04
+#define SCTP_ASSOC_SUPPORTS_RE_CONFIG 0x05
+#define SCTP_ASSOC_SUPPORTS_MAX 0x05
/*
* Address event
*/
@@ -343,7 +349,7 @@
uint8_t sre_data[4];
};
-/* data send failure event */
+/* data send failure event (deprecated) */
struct sctp_send_failed {
uint16_t ssf_type;
uint16_t ssf_flags;
@@ -354,6 +360,17 @@
uint8_t ssf_data[];
};
+/* data send failure event (not deprecated) */
+struct sctp_send_failed_event {
+ uint16_t ssfe_type;
+ uint16_t ssfe_flags;
+ uint32_t ssfe_length;
+ uint32_t ssfe_error;
+ struct sctp_sndinfo ssfe_info;
+ sctp_assoc_t ssfe_assoc_id;
+ uint8_t ssfe_data[];
+};
+
/* flag that indicates state of data */
#define SCTP_DATA_UNSENT 0x0001 /* inqueue never on wire */
#define SCTP_DATA_SENT 0x0002 /* on wire at failure */
@@ -424,7 +441,8 @@
};
/* indication values */
-#define SCTP_AUTH_NEWKEY 0x0001
+#define SCTP_AUTH_NEW_KEY 0x0001
+#define SCTP_AUTH_NEWKEY SCTP_AUTH_NEW_KEY
#define SCTP_AUTH_NO_AUTH 0x0002
#define SCTP_AUTH_FREE_KEY 0x0003
@@ -449,9 +467,10 @@
};
/* flags in stream_reset_event (strreset_flags) */
-#define SCTP_STREAM_RESET_DENIED 0x0004 /* SCTP_STRRESET_FAILED */
-#define SCTP_STREAM_RESET_FAILED 0x0008 /* SCTP_STRRESET_FAILED */
-#define SCTP_STREAM_CHANGED_DENIED 0x0010
+#define SCTP_STREAM_RESET_INCOMING_SSN 0x0001
+#define SCTP_STREAM_RESET_OUTGOING_SSN 0x0002
+#define SCTP_STREAM_RESET_DENIED 0x0004
+#define SCTP_STREAM_RESET_FAILED 0x0008
/*
* Assoc reset event - subscribe to SCTP_ASSOC_RESET_EVENT
@@ -504,29 +523,29 @@
struct sctp_pdapi_event sn_pdapi_event;
struct sctp_authkey_event sn_auth_event;
struct sctp_sender_dry_event sn_sender_dry_event;
+ struct sctp_send_failed_event sn_send_failed_event;
struct sctp_stream_reset_event sn_strreset_event;
struct sctp_assoc_reset_event sn_assocreset_event;
struct sctp_stream_change_event sn_strchange_event;
-
};
/* notification types */
-#define SCTP_ASSOC_CHANGE 0x0001
-#define SCTP_PEER_ADDR_CHANGE 0x0002
-#define SCTP_REMOTE_ERROR 0x0003
-#define SCTP_SEND_FAILED 0x0004
-#define SCTP_SHUTDOWN_EVENT 0x0005
-#define SCTP_ADAPTATION_INDICATION 0x0006
+#define SCTP_ASSOC_CHANGE 0x0001
+#define SCTP_PEER_ADDR_CHANGE 0x0002
+#define SCTP_REMOTE_ERROR 0x0003
+#define SCTP_SEND_FAILED 0x0004
+#define SCTP_SHUTDOWN_EVENT 0x0005
+#define SCTP_ADAPTATION_INDICATION 0x0006
/* same as above */
-#define SCTP_ADAPTION_INDICATION 0x0006
-#define SCTP_PARTIAL_DELIVERY_EVENT 0x0007
-#define SCTP_AUTHENTICATION_EVENT 0x0008
-#define SCTP_STREAM_RESET_EVENT 0x0009
-#define SCTP_SENDER_DRY_EVENT 0x000a
-#define SCTP_NOTIFICATIONS_STOPPED_EVENT 0x000b /* we don't send this */
-#define SCTP_ASSOC_RESET_EVENT 0x000c
-#define SCTP_STREAM_CHANGE_EVENT 0x000d
-
+#define SCTP_ADAPTION_INDICATION 0x0006
+#define SCTP_PARTIAL_DELIVERY_EVENT 0x0007
+#define SCTP_AUTHENTICATION_EVENT 0x0008
+#define SCTP_STREAM_RESET_EVENT 0x0009
+#define SCTP_SENDER_DRY_EVENT 0x000a
+#define SCTP_NOTIFICATIONS_STOPPED_EVENT 0x000b /* we don't send this */
+#define SCTP_ASSOC_RESET_EVENT 0x000c
+#define SCTP_STREAM_CHANGE_EVENT 0x000d
+#define SCTP_SEND_FAILED_EVENT 0x000e
/*
* socket option structs
*/
@@ -605,13 +624,6 @@
struct sockaddr addr[1];
};
-struct sctp_setstrm_timeout {
- sctp_assoc_t ssto_assoc_id;
- uint32_t ssto_timeout;
- uint32_t ssto_streamid_start;
- uint32_t ssto_streamid_end;
-};
-
struct sctp_status {
sctp_assoc_t sstat_assoc_id;
int32_t sstat_state;
@@ -664,6 +676,7 @@
/* SCTP_PEER_AUTH_CHUNKS / SCTP_LOCAL_AUTH_CHUNKS */
struct sctp_authchunks {
sctp_assoc_t gauth_assoc_id;
+ uint32_t gauth_number_of_chunks;
uint8_t gauth_chunks[];
};
@@ -1111,10 +1124,10 @@
#define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x)
union sctp_sockstore {
-#if defined(INET) || !defined(_KERNEL)
+#if defined(INET)
struct sockaddr_in sin;
#endif
-#if defined(INET6) || !defined(_KERNEL)
+#if defined(INET6)
struct sockaddr_in6 sin6;
#endif
struct sockaddr sa;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_usrreq.c
--- a/head/sys/netinet/sctp_usrreq.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_usrreq.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,16 +30,15 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_usrreq.c,v 1.48 2005/03/07 23:26:08 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_usrreq.c 234464 2012-04-19 15:30:15Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_usrreq.c 238501 2012-07-15 20:16:17Z tuexen $");
+
#include <netinet/sctp_os.h>
#include <sys/proc.h>
#include <netinet/sctp_pcb.h>
#include <netinet/sctp_header.h>
#include <netinet/sctp_var.h>
-#if defined(INET6)
+#ifdef INET6
#endif
#include <netinet/sctp_sysctl.h>
#include <netinet/sctp_output.h>
@@ -79,7 +78,6 @@
* now I will just copy.
*/
SCTP_BASE_SYSCTL(sctp_recvspace) = SCTP_BASE_SYSCTL(sctp_sendspace);
-
SCTP_BASE_VAR(first_time) = 0;
SCTP_BASE_VAR(sctp_pcb_initialized) = 0;
sctp_pcb_init();
@@ -88,8 +86,6 @@
SCTP_BASE_VAR(packet_log_end) = 0;
bzero(&SCTP_BASE_VAR(packet_log_buffer), SCTP_PACKET_LOG_SIZE);
#endif
-
-
}
void
@@ -228,15 +224,13 @@
struct sctp_tcb *stcb,
struct sctp_nets *net)
{
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
+ struct icmp *icmph;
+
/* protection */
- int reason;
- struct icmp *icmph;
-
-
if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
(sh == NULL) || (to == NULL)) {
if (stcb)
@@ -275,7 +269,7 @@
net->dest_state &= ~SCTP_ADDR_REACHABLE;
net->dest_state &= ~SCTP_ADDR_PF;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
- stcb, SCTP_FAILED_THRESHOLD,
+ stcb, 0,
(void *)net, SCTP_SO_NOT_LOCKED);
}
SCTP_TCB_UNLOCK(stcb);
@@ -288,9 +282,8 @@
* now is dead. In either case treat it like a OOTB abort
* with no TCB
*/
- reason = SCTP_PEER_FAULTY;
- sctp_abort_notification(stcb, reason, SCTP_SO_NOT_LOCKED);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -299,7 +292,7 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
#endif
@@ -352,8 +345,8 @@
* 'from' holds our local endpoint address. Thus we reverse
* the to and the from in the lookup.
*/
- stcb = sctp_findassociation_addr_sa((struct sockaddr *)&from,
- (struct sockaddr *)&to,
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&to,
+ (struct sockaddr *)&from,
&inp, &net, 1, vrf_id);
if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
if (cmd != PRC_MSGSIZE) {
@@ -404,8 +397,8 @@
if (error)
return (error);
- stcb = sctp_findassociation_addr_sa(sintosa(&addrs[0]),
- sintosa(&addrs[1]),
+ stcb = sctp_findassociation_addr_sa(sintosa(&addrs[1]),
+ sintosa(&addrs[0]),
&inp, &net, 1, vrf_id);
if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
if ((inp != NULL) && (stcb == NULL)) {
@@ -783,9 +776,6 @@
ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
ph->param_length = htons(SCTP_BUF_LEN(err));
}
-#if defined(SCTP_PANIC_ON_ABORT)
- panic("disconnect does an abort");
-#endif
sctp_send_abort_tcb(stcb, err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
}
@@ -891,10 +881,6 @@
ippp = (uint32_t *) (ph + 1);
*ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4);
}
-#if defined(SCTP_PANIC_ON_ABORT)
- panic("disconnect does an abort");
-#endif
-
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -1106,12 +1092,8 @@
ippp = (uint32_t *) (ph + 1);
*ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
}
-#if defined(SCTP_PANIC_ON_ABORT)
- panic("shutdown does an abort");
-#endif
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
sctp_abort_an_association(stcb->sctp_ep, stcb,
- SCTP_RESPONSE_TO_USER_REQ,
op_err, SCTP_SO_LOCKED);
goto skip_unlock;
} else {
@@ -2882,6 +2864,7 @@
} else {
/* copy in the chunks */
(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ sac->gauth_number_of_chunks = (uint32_t) size;
*optsize = sizeof(struct sctp_authchunks) + size;
}
SCTP_TCB_UNLOCK(stcb);
@@ -2900,6 +2883,7 @@
} else {
/* copy in the chunks */
(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ sac->gauth_number_of_chunks = (uint32_t) size;
*optsize = sizeof(struct sctp_authchunks) + size;
}
SCTP_INP_RUNLOCK(inp);
@@ -2930,6 +2914,7 @@
} else {
/* copy in the chunks */
(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ sac->gauth_number_of_chunks = (uint32_t) size;
*optsize = sizeof(struct sctp_authchunks) + size;
}
SCTP_TCB_UNLOCK(stcb);
@@ -2983,6 +2968,15 @@
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP);
error = ENOTSUP;
break;
+ case SCTP_ASSOC_RESET_EVENT:
+ event_type = SCTP_PCB_FLAGS_ASSOC_RESETEVNT;
+ break;
+ case SCTP_STREAM_CHANGE_EVENT:
+ event_type = SCTP_PCB_FLAGS_STREAM_CHANGEEVNT;
+ break;
+ case SCTP_SEND_FAILED_EVENT:
+ event_type = SCTP_PCB_FLAGS_RECVNSENDFAILEVNT;
+ break;
default:
event_type = 0;
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -3290,6 +3284,33 @@
}
break;
}
+ case SCTP_ENABLE_STREAM_RESET:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = (uint32_t) stcb->asoc.local_strreset_support;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = (uint32_t) inp->local_strreset_support;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
default:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
@@ -4090,7 +4111,6 @@
case SCTP_ENABLE_STREAM_RESET:
{
struct sctp_assoc_value *av;
- uint8_t set_value = 0;
SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
if (av->assoc_value & (~SCTP_ENABLE_VALUE_MASK)) {
@@ -4098,10 +4118,9 @@
error = EINVAL;
break;
}
- set_value = av->assoc_value & SCTP_ENABLE_VALUE_MASK;
SCTP_FIND_STCB(inp, stcb, av->assoc_id);
if (stcb) {
- stcb->asoc.local_strreset_support = set_value;
+ stcb->asoc.local_strreset_support = (uint8_t) av->assoc_value;
SCTP_TCB_UNLOCK(stcb);
} else {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
@@ -4109,7 +4128,7 @@
(av->assoc_id == SCTP_FUTURE_ASSOC) ||
(av->assoc_id == SCTP_ALL_ASSOC)) {
SCTP_INP_WLOCK(inp);
- inp->local_strreset_support = set_value;
+ inp->local_strreset_support = (uint8_t) av->assoc_value;
SCTP_INP_WUNLOCK(inp);
}
if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
@@ -4117,7 +4136,7 @@
SCTP_INP_RLOCK(inp);
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
SCTP_TCB_LOCK(stcb);
- stcb->asoc.local_strreset_support = set_value;
+ stcb->asoc.local_strreset_support = (uint8_t) av->assoc_value;
SCTP_TCB_UNLOCK(stcb);
}
SCTP_INP_RUNLOCK(inp);
@@ -4133,7 +4152,6 @@
SCTP_CHECK_AND_CAST(strrst, optval, struct sctp_reset_streams, optsize);
SCTP_FIND_STCB(inp, stcb, strrst->srs_assoc_id);
-
if (stcb == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
error = ENOENT;
@@ -4148,15 +4166,6 @@
SCTP_TCB_UNLOCK(stcb);
break;
}
- if (!(stcb->asoc.local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) {
- /*
- * User did not enable the operation.
- */
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPERM);
- error = EPERM;
- SCTP_TCB_UNLOCK(stcb);
- break;
- }
if (stcb->asoc.stream_reset_outstanding) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
error = EALREADY;
@@ -4215,6 +4224,21 @@
error = ENOENT;
break;
}
+ if (stcb->asoc.peer_supports_strreset == 0) {
+ /*
+ * Peer does not support the chunk type.
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ error = EOPNOTSUPP;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ if (stcb->asoc.stream_reset_outstanding) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+ error = EALREADY;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
if ((stradd->sas_outstrms == 0) &&
(stradd->sas_instrms == 0)) {
error = EINVAL;
@@ -4278,15 +4302,6 @@
SCTP_TCB_UNLOCK(stcb);
break;
}
- if (!(stcb->asoc.local_strreset_support & SCTP_ENABLE_RESET_ASSOC_REQ)) {
- /*
- * User did not enable the operation.
- */
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPERM);
- error = EPERM;
- SCTP_TCB_UNLOCK(stcb);
- break;
- }
if (stcb->asoc.stream_reset_outstanding) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
error = EALREADY;
@@ -4810,12 +4825,12 @@
if (net->dest_state & SCTP_ADDR_REACHABLE) {
if (net->error_count > paddrp->spp_pathmaxrxt) {
net->dest_state &= ~SCTP_ADDR_REACHABLE;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, SCTP_RESPONSE_TO_USER_REQ, net, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
}
} else {
if (net->error_count <= paddrp->spp_pathmaxrxt) {
net->dest_state |= SCTP_ADDR_REACHABLE;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, SCTP_RESPONSE_TO_USER_REQ, net, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
}
}
net->failure_threshold = paddrp->spp_pathmaxrxt;
@@ -4853,12 +4868,12 @@
if (net->dest_state & SCTP_ADDR_REACHABLE) {
if (net->error_count > paddrp->spp_pathmaxrxt) {
net->dest_state &= ~SCTP_ADDR_REACHABLE;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, SCTP_RESPONSE_TO_USER_REQ, net, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
}
} else {
if (net->error_count <= paddrp->spp_pathmaxrxt) {
net->dest_state |= SCTP_ADDR_REACHABLE;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, SCTP_RESPONSE_TO_USER_REQ, net, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
}
}
net->failure_threshold = paddrp->spp_pathmaxrxt;
@@ -5243,7 +5258,6 @@
case SCTP_BINDX_ADD_ADDR:
{
struct sctp_getaddresses *addrs;
- size_t sz;
struct thread *td;
td = (struct thread *)p;
@@ -5251,8 +5265,7 @@
optsize);
#ifdef INET
if (addrs->addr->sa_family == AF_INET) {
- sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in);
- if (optsize < sz) {
+ if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
@@ -5265,8 +5278,7 @@
#endif
#ifdef INET6
if (addrs->addr->sa_family == AF_INET6) {
- sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6);
- if (optsize < sz) {
+ if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
@@ -5290,7 +5302,6 @@
case SCTP_BINDX_REM_ADDR:
{
struct sctp_getaddresses *addrs;
- size_t sz;
struct thread *td;
td = (struct thread *)p;
@@ -5298,8 +5309,7 @@
SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, optsize);
#ifdef INET
if (addrs->addr->sa_family == AF_INET) {
- sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in);
- if (optsize < sz) {
+ if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
@@ -5312,8 +5322,7 @@
#endif
#ifdef INET6
if (addrs->addr->sa_family == AF_INET6) {
- sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6);
- if (optsize < sz) {
+ if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
@@ -5379,6 +5388,15 @@
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP);
error = ENOTSUP;
break;
+ case SCTP_ASSOC_RESET_EVENT:
+ event_type = SCTP_PCB_FLAGS_ASSOC_RESETEVNT;
+ break;
+ case SCTP_STREAM_CHANGE_EVENT:
+ event_type = SCTP_PCB_FLAGS_STREAM_CHANGEEVNT;
+ break;
+ case SCTP_SEND_FAILED_EVENT:
+ event_type = SCTP_PCB_FLAGS_RECVNSENDFAILEVNT;
+ break;
default:
event_type = 0;
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -5655,12 +5673,12 @@
if (net->dest_state & SCTP_ADDR_REACHABLE) {
if (net->failure_threshold > thlds->spt_pathmaxrxt) {
net->dest_state &= ~SCTP_ADDR_REACHABLE;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, SCTP_RESPONSE_TO_USER_REQ, net, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
}
} else {
if (net->failure_threshold <= thlds->spt_pathmaxrxt) {
net->dest_state |= SCTP_ADDR_REACHABLE;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, SCTP_RESPONSE_TO_USER_REQ, net, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
}
}
net->failure_threshold = thlds->spt_pathmaxrxt;
@@ -5684,12 +5702,12 @@
if (net->dest_state & SCTP_ADDR_REACHABLE) {
if (net->failure_threshold > thlds->spt_pathmaxrxt) {
net->dest_state &= ~SCTP_ADDR_REACHABLE;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, SCTP_RESPONSE_TO_USER_REQ, net, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
}
} else {
if (net->failure_threshold <= thlds->spt_pathmaxrxt) {
net->dest_state |= SCTP_ADDR_REACHABLE;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, SCTP_RESPONSE_TO_USER_REQ, net, SCTP_SO_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
}
}
net->failure_threshold = thlds->spt_pathmaxrxt;
@@ -5809,7 +5827,6 @@
{
void *optval = NULL;
size_t optsize = 0;
- struct sctp_inpcb *inp;
void *p;
int error = 0;
@@ -5819,7 +5836,7 @@
if (INP_CHECK_SOCKAF(so, AF_INET6))
error = ip6_ctloutput(so, sopt);
#endif /* INET6 */
-#if defined(INET) && defined (INET6)
+#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
@@ -5827,12 +5844,11 @@
#endif
return (error);
}
- inp = (struct sctp_inpcb *)so->so_pcb;
optsize = sopt->sopt_valsize;
if (optsize) {
SCTP_MALLOC(optval, void *, optsize, SCTP_M_SOCKOPT);
if (optval == NULL) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
+ SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
return (ENOBUFS);
}
error = sooptcopyin(sopt, optval, optsize, optsize);
@@ -5847,7 +5863,7 @@
} else if (sopt->sopt_dir == SOPT_GET) {
error = sctp_getopt(so, sopt->sopt_name, optval, &optsize, p);
} else {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
if ((error == 0) && (optval != NULL)) {
@@ -6454,7 +6470,6 @@
return (0);
}
-#ifdef INET
struct pr_usrreqs sctp_usrreqs = {
.pru_abort = sctp_abort,
.pru_accept = sctp_accept,
@@ -6477,4 +6492,3 @@
};
#endif
-#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctp_var.h
--- a/head/sys/netinet/sctp_var.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctp_var.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctp_var.h,v 1.24 2005/03/06 16:04:19 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctp_var.h 234464 2012-04-19 15:30:15Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctp_var.h 235828 2012-05-23 11:26:28Z tuexen $");
#ifndef _NETINET_SCTP_VAR_H_
#define _NETINET_SCTP_VAR_H_
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctputil.c
--- a/head/sys/netinet/sctputil.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctputil.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,10 +30,8 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $KAME: sctputil.c,v 1.37 2005/03/07 23:26:09 itojun Exp $ */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctputil.c 234539 2012-04-21 11:53:24Z tuexen $");
+__FBSDID("$FreeBSD: head/sys/netinet/sctputil.c 238550 2012-07-17 13:03:47Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_pcb.h>
@@ -1127,7 +1125,7 @@
{
unsigned int i, limit;
- printf("Mapping array size: %d, baseTSN: %8.8x, cumAck: %8.8x, highestTSN: (%8.8x, %8.8x).\n",
+ SCTP_PRINTF("Mapping array size: %d, baseTSN: %8.8x, cumAck: %8.8x, highestTSN: (%8.8x, %8.8x).\n",
asoc->mapping_array_size,
asoc->mapping_array_base_tsn,
asoc->cumulative_tsn,
@@ -1138,23 +1136,23 @@
break;
}
}
- printf("Renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit);
+ SCTP_PRINTF("Renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit);
for (i = 0; i < limit; i++) {
- printf("%2.2x%c", asoc->mapping_array[i], ((i + 1) % 16) ? ' ' : '\n');
+ SCTP_PRINTF("%2.2x%c", asoc->mapping_array[i], ((i + 1) % 16) ? ' ' : '\n');
}
if (limit % 16)
- printf("\n");
+ SCTP_PRINTF("\n");
for (limit = asoc->mapping_array_size; limit > 1; limit--) {
if (asoc->nr_mapping_array[limit - 1]) {
break;
}
}
- printf("Non renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit);
+ SCTP_PRINTF("Non renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit);
for (i = 0; i < limit; i++) {
- printf("%2.2x%c", asoc->nr_mapping_array[i], ((i + 1) % 16) ? ' ' : '\n');
+ SCTP_PRINTF("%2.2x%c", asoc->nr_mapping_array[i], ((i + 1) % 16) ? ' ' : '\n');
}
if (limit % 16)
- printf("\n");
+ SCTP_PRINTF("\n");
}
int
@@ -1292,7 +1290,7 @@
goto no_stcb;
}
/* If we reach here huh? */
- printf("Unknown it ctl flag %x\n",
+ SCTP_PRINTF("Unknown it ctl flag %x\n",
sctp_it_ctl.iterator_flags);
sctp_it_ctl.iterator_flags = 0;
}
@@ -1414,7 +1412,7 @@
struct sctp_nets *net;
struct sctp_timer *tmr;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -1725,8 +1723,7 @@
break;
}
SCTP_STAT_INCR(sctps_timoshutdownguard);
- sctp_abort_an_association(inp, stcb,
- SCTP_SHUTDOWN_GUARD_EXPIRES, NULL, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(inp, stcb, NULL, SCTP_SO_NOT_LOCKED);
/* no need to unlock on tcb its gone */
goto out_decr;
@@ -1780,7 +1777,7 @@
/* Can we free it yet? */
SCTP_INP_DECR_REF(inp);
sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -1789,7 +1786,7 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
/*
@@ -2576,15 +2573,13 @@
/* find the last mbuf in chain and pad it */
struct mbuf *m_at;
- m_at = m;
if (last_mbuf) {
return (sctp_add_pad_tombuf(last_mbuf, padval));
} else {
- while (m_at) {
+ for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
if (SCTP_BUF_NEXT(m_at) == NULL) {
return (sctp_add_pad_tombuf(m_at, padval));
}
- m_at = SCTP_BUF_NEXT(m_at);
}
}
SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
@@ -2592,8 +2587,8 @@
}
static void
-sctp_notify_assoc_change(uint32_t event, struct sctp_tcb *stcb,
- uint32_t error, int so_locked
+sctp_notify_assoc_change(uint16_t state, struct sctp_tcb *stcb,
+ uint16_t error, struct sctp_abort_chunk *abort, uint8_t from_peer, int so_locked
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
@@ -2602,111 +2597,133 @@
struct mbuf *m_notify;
struct sctp_assoc_change *sac;
struct sctp_queued_to_read *control;
-
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ size_t notif_len, abort_len;
+ unsigned int i;
+
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
+ if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
+ notif_len = sizeof(struct sctp_assoc_change);
+ if (abort != NULL) {
+ abort_len = htons(abort->ch.chunk_length);
+ } else {
+ abort_len = 0;
+ }
+ if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) {
+ notif_len += SCTP_ASSOC_SUPPORTS_MAX;
+ } else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) {
+ notif_len += abort_len;
+ }
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL) {
+ /* Retry with smaller value. */
+ notif_len = sizeof(struct sctp_assoc_change);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL) {
+ goto set_error;
+ }
+ }
+ SCTP_BUF_NEXT(m_notify) = NULL;
+ sac = mtod(m_notify, struct sctp_assoc_change *);
+ sac->sac_type = SCTP_ASSOC_CHANGE;
+ sac->sac_flags = 0;
+ sac->sac_length = sizeof(struct sctp_assoc_change);
+ sac->sac_state = state;
+ sac->sac_error = error;
+ /* XXX verify these stream counts */
+ sac->sac_outbound_streams = stcb->asoc.streamoutcnt;
+ sac->sac_inbound_streams = stcb->asoc.streamincnt;
+ sac->sac_assoc_id = sctp_get_associd(stcb);
+ if (notif_len > sizeof(struct sctp_assoc_change)) {
+ if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) {
+ i = 0;
+ if (stcb->asoc.peer_supports_prsctp) {
+ sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_PR;
+ }
+ if (stcb->asoc.peer_supports_auth) {
+ sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_AUTH;
+ }
+ if (stcb->asoc.peer_supports_asconf) {
+ sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_ASCONF;
+ }
+ sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_MULTIBUF;
+ if (stcb->asoc.peer_supports_strreset) {
+ sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_RE_CONFIG;
+ }
+ sac->sac_length += i;
+ } else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) {
+ memcpy(sac->sac_info, abort, abort_len);
+ sac->sac_length += abort_len;
+ }
+ }
+ SCTP_BUF_LEN(m_notify) = sac->sac_length;
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, stcb->asoc.context, 0, 0, 0,
+ m_notify);
+ if (control != NULL) {
+ control->length = SCTP_BUF_LEN(m_notify);
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ control->spec_flags = M_NOTIFICATION;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD,
+ so_locked);
+ } else {
+ sctp_m_freem(m_notify);
+ }
+ }
/*
- * For TCP model AND UDP connected sockets we will send an error up
- * when an ABORT comes in.
+ * For 1-to-1 style sockets, we send up and error when an ABORT
+ * comes in.
*/
+set_error:
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
- ((event == SCTP_COMM_LOST) || (event == SCTP_CANT_STR_ASSOC))) {
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
- SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
- stcb->sctp_socket->so_error = ECONNREFUSED;
+ ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
+ if (from_peer) {
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
+ stcb->sctp_socket->so_error = ECONNREFUSED;
+ } else {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+ stcb->sctp_socket->so_error = ECONNRESET;
+ }
} else {
- SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
- stcb->sctp_socket->so_error = ECONNRESET;
- }
- /* Wake ANY sleepers */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- so = SCTP_INP_SO(stcb->sctp_ep);
- if (!so_locked) {
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- SCTP_SOCKET_LOCK(so, 1);
- SCTP_TCB_LOCK(stcb);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
- if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
- SCTP_SOCKET_UNLOCK(so, 1);
- return;
- }
- }
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED);
+ stcb->sctp_socket->so_error = ECONNABORTED;
+ }
+ }
+ /* Wake ANY sleepers */
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ if (!so_locked) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+ }
#endif
+ if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+ ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
socantrcvmore(stcb->sctp_socket);
- sorwakeup(stcb->sctp_socket);
- sowwakeup(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- if (!so_locked) {
- SCTP_SOCKET_UNLOCK(so, 1);
- }
+ }
+ sorwakeup(stcb->sctp_socket);
+ sowwakeup(stcb->sctp_socket);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (!so_locked) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ }
#endif
- }
- if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
- /* event not enabled */
- return;
- }
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_change), 0, M_DONTWAIT, 1, MT_DATA);
- if (m_notify == NULL)
- /* no space left */
- return;
- SCTP_BUF_LEN(m_notify) = 0;
-
- sac = mtod(m_notify, struct sctp_assoc_change *);
- sac->sac_type = SCTP_ASSOC_CHANGE;
- sac->sac_flags = 0;
- sac->sac_length = sizeof(struct sctp_assoc_change);
- sac->sac_state = event;
- sac->sac_error = error;
- /* XXX verify these stream counts */
- sac->sac_outbound_streams = stcb->asoc.streamoutcnt;
- sac->sac_inbound_streams = stcb->asoc.streamincnt;
- sac->sac_assoc_id = sctp_get_associd(stcb);
- SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_assoc_change);
- SCTP_BUF_NEXT(m_notify) = NULL;
- control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
- 0, 0, stcb->asoc.context, 0, 0, 0,
- m_notify);
- if (control == NULL) {
- /* no memory */
- sctp_m_freem(m_notify);
- return;
- }
- control->length = SCTP_BUF_LEN(m_notify);
- /* not that we need this */
- control->tail_mbuf = m_notify;
- control->spec_flags = M_NOTIFICATION;
- sctp_add_to_readq(stcb->sctp_ep, stcb,
- control,
- &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD,
- so_locked);
- if (event == SCTP_COMM_LOST) {
- /* Wake up any sleeper */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- so = SCTP_INP_SO(stcb->sctp_ep);
- if (!so_locked) {
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- SCTP_SOCKET_LOCK(so, 1);
- SCTP_TCB_LOCK(stcb);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
- if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
- SCTP_SOCKET_UNLOCK(so, 1);
- return;
- }
- }
-#endif
- sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- if (!so_locked) {
- SCTP_SOCKET_UNLOCK(so, 1);
- }
-#endif
- }
}
static void
@@ -2789,7 +2806,7 @@
static void
-sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error,
+sctp_notify_send_failed(struct sctp_tcb *stcb, uint8_t sent, uint32_t error,
struct sctp_tmit_chunk *chk, int so_locked
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -2798,39 +2815,68 @@
{
struct mbuf *m_notify;
struct sctp_send_failed *ssf;
+ struct sctp_send_failed_event *ssfe;
struct sctp_queued_to_read *control;
int length;
if ((stcb == NULL) ||
- sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) {
+ (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT) &&
+ sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT))) {
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA);
+ if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
+ length = sizeof(struct sctp_send_failed_event);
+ } else {
+ length = sizeof(struct sctp_send_failed);
+ }
+ m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
- length = sizeof(struct sctp_send_failed) + chk->send_size;
+ length += chk->send_size;
length -= sizeof(struct sctp_data_chunk);
SCTP_BUF_LEN(m_notify) = 0;
- ssf = mtod(m_notify, struct sctp_send_failed *);
- ssf->ssf_type = SCTP_SEND_FAILED;
- if (error == SCTP_NOTIFY_DATAGRAM_UNSENT)
- ssf->ssf_flags = SCTP_DATA_UNSENT;
- else
- ssf->ssf_flags = SCTP_DATA_SENT;
- ssf->ssf_length = length;
- ssf->ssf_error = error;
- /* not exactly what the user sent in, but should be close :) */
- bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
- ssf->ssf_info.sinfo_stream = chk->rec.data.stream_number;
- ssf->ssf_info.sinfo_ssn = chk->rec.data.stream_seq;
- ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags;
- ssf->ssf_info.sinfo_ppid = chk->rec.data.payloadtype;
- ssf->ssf_info.sinfo_context = chk->rec.data.context;
- ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
- ssf->ssf_assoc_id = sctp_get_associd(stcb);
-
+ if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
+ ssfe = mtod(m_notify, struct sctp_send_failed_event *);
+ ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
+ if (sent) {
+ ssfe->ssfe_flags = SCTP_DATA_SENT;
+ } else {
+ ssfe->ssfe_flags = SCTP_DATA_UNSENT;
+ }
+ ssfe->ssfe_length = length;
+ ssfe->ssfe_error = error;
+ /* not exactly what the user sent in, but should be close :) */
+ bzero(&ssfe->ssfe_info, sizeof(ssfe->ssfe_info));
+ ssfe->ssfe_info.snd_sid = chk->rec.data.stream_number;
+ ssfe->ssfe_info.snd_flags = chk->rec.data.rcv_flags;
+ ssfe->ssfe_info.snd_ppid = chk->rec.data.payloadtype;
+ ssfe->ssfe_info.snd_context = chk->rec.data.context;
+ ssfe->ssfe_info.snd_assoc_id = sctp_get_associd(stcb);
+ ssfe->ssfe_assoc_id = sctp_get_associd(stcb);
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event);
+ } else {
+ ssf = mtod(m_notify, struct sctp_send_failed *);
+ ssf->ssf_type = SCTP_SEND_FAILED;
+ if (sent) {
+ ssf->ssf_flags = SCTP_DATA_SENT;
+ } else {
+ ssf->ssf_flags = SCTP_DATA_UNSENT;
+ }
+ ssf->ssf_length = length;
+ ssf->ssf_error = error;
+ /* not exactly what the user sent in, but should be close :) */
+ bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
+ ssf->ssf_info.sinfo_stream = chk->rec.data.stream_number;
+ ssf->ssf_info.sinfo_ssn = chk->rec.data.stream_seq;
+ ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags;
+ ssf->ssf_info.sinfo_ppid = chk->rec.data.payloadtype;
+ ssf->ssf_info.sinfo_context = chk->rec.data.context;
+ ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
+ ssf->ssf_assoc_id = sctp_get_associd(stcb);
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
+ }
if (chk->data) {
/*
* trim off the sctp chunk header(it should be there)
@@ -2842,7 +2888,6 @@
}
}
SCTP_BUF_NEXT(m_notify) = chk->data;
- SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
/* Steal off the mbuf */
chk->data = NULL;
/*
@@ -2882,43 +2927,69 @@
{
struct mbuf *m_notify;
struct sctp_send_failed *ssf;
+ struct sctp_send_failed_event *ssfe;
struct sctp_queued_to_read *control;
int length;
if ((stcb == NULL) ||
- sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) {
+ (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT) &&
+ sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT))) {
/* event not enabled */
return;
}
- length = sizeof(struct sctp_send_failed) + sp->length;
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA);
- if (m_notify == NULL)
+ if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
+ length = sizeof(struct sctp_send_failed_event);
+ } else {
+ length = sizeof(struct sctp_send_failed);
+ }
+ m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL) {
/* no space left */
return;
+ }
+ length += sp->length;
SCTP_BUF_LEN(m_notify) = 0;
- ssf = mtod(m_notify, struct sctp_send_failed *);
- ssf->ssf_type = SCTP_SEND_FAILED;
- if (error == SCTP_NOTIFY_DATAGRAM_UNSENT)
+ if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
+ ssfe = mtod(m_notify, struct sctp_send_failed_event *);
+ ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
+ ssfe->ssfe_flags = SCTP_DATA_UNSENT;
+ ssfe->ssfe_length = length;
+ ssfe->ssfe_error = error;
+ /* not exactly what the user sent in, but should be close :) */
+ bzero(&ssfe->ssfe_info, sizeof(ssfe->ssfe_info));
+ ssfe->ssfe_info.snd_sid = sp->stream;
+ if (sp->some_taken) {
+ ssfe->ssfe_info.snd_flags = SCTP_DATA_LAST_FRAG;
+ } else {
+ ssfe->ssfe_info.snd_flags = SCTP_DATA_NOT_FRAG;
+ }
+ ssfe->ssfe_info.snd_ppid = sp->ppid;
+ ssfe->ssfe_info.snd_context = sp->context;
+ ssfe->ssfe_info.snd_assoc_id = sctp_get_associd(stcb);
+ ssfe->ssfe_assoc_id = sctp_get_associd(stcb);
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event);
+ } else {
+ ssf = mtod(m_notify, struct sctp_send_failed *);
+ ssf->ssf_type = SCTP_SEND_FAILED;
ssf->ssf_flags = SCTP_DATA_UNSENT;
- else
- ssf->ssf_flags = SCTP_DATA_SENT;
- ssf->ssf_length = length;
- ssf->ssf_error = error;
- /* not exactly what the user sent in, but should be close :) */
- bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
- ssf->ssf_info.sinfo_stream = sp->stream;
- ssf->ssf_info.sinfo_ssn = sp->strseq;
- if (sp->some_taken) {
- ssf->ssf_info.sinfo_flags = SCTP_DATA_LAST_FRAG;
- } else {
- ssf->ssf_info.sinfo_flags = SCTP_DATA_NOT_FRAG;
- }
- ssf->ssf_info.sinfo_ppid = sp->ppid;
- ssf->ssf_info.sinfo_context = sp->context;
- ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
- ssf->ssf_assoc_id = sctp_get_associd(stcb);
+ ssf->ssf_length = length;
+ ssf->ssf_error = error;
+ /* not exactly what the user sent in, but should be close :) */
+ bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
+ ssf->ssf_info.sinfo_stream = sp->stream;
+ ssf->ssf_info.sinfo_ssn = sp->strseq;
+ if (sp->some_taken) {
+ ssf->ssf_info.sinfo_flags = SCTP_DATA_LAST_FRAG;
+ } else {
+ ssf->ssf_info.sinfo_flags = SCTP_DATA_NOT_FRAG;
+ }
+ ssf->ssf_info.sinfo_ppid = sp->ppid;
+ ssf->ssf_info.sinfo_context = sp->context;
+ ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
+ ssf->ssf_assoc_id = sctp_get_associd(stcb);
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
+ }
SCTP_BUF_NEXT(m_notify) = sp->data;
- SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
/* Steal off the mbuf */
sp->data = NULL;
@@ -3063,7 +3134,7 @@
}
if (stcb->sctp_ep && stcb->sctp_socket) {
/* This should always be the case */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -3080,7 +3151,7 @@
}
#endif
sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -3102,7 +3173,7 @@
if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
/* mark socket closed for read/write and wakeup! */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -3117,7 +3188,7 @@
}
#endif
socantsendmore(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -3214,7 +3285,8 @@
struct sctp_stream_change_event *stradd;
int len;
- if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT)) {
+ if ((stcb == NULL) ||
+ (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_CHANGEEVNT))) {
/* event not enabled */
return;
}
@@ -3275,7 +3347,8 @@
struct sctp_assoc_reset_event *strasoc;
int len;
- if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT)) {
+ if ((stcb == NULL) ||
+ (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ASSOC_RESETEVNT))) {
/* event not enabled */
return;
}
@@ -3333,7 +3406,8 @@
struct sctp_stream_reset_event *strreset;
int len;
- if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT)) {
+ if ((stcb == NULL) ||
+ (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT))) {
/* event not enabled */
return;
}
@@ -3386,6 +3460,63 @@
}
+static void
+sctp_notify_remote_error(struct sctp_tcb *stcb, uint16_t error, struct sctp_error_chunk *chunk)
+{
+ struct mbuf *m_notify;
+ struct sctp_remote_error *sre;
+ struct sctp_queued_to_read *control;
+ size_t notif_len, chunk_len;
+
+ if ((stcb == NULL) ||
+ sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVPEERERR)) {
+ return;
+ }
+ if (chunk != NULL) {
+ chunk_len = htons(chunk->ch.chunk_length);
+ } else {
+ chunk_len = 0;
+ }
+ notif_len = sizeof(struct sctp_remote_error) + chunk_len;
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL) {
+ /* Retry with smaller value. */
+ notif_len = sizeof(struct sctp_remote_error);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL) {
+ return;
+ }
+ }
+ SCTP_BUF_NEXT(m_notify) = NULL;
+ sre = mtod(m_notify, struct sctp_remote_error *);
+ sre->sre_type = SCTP_REMOTE_ERROR;
+ sre->sre_flags = 0;
+ sre->sre_length = sizeof(struct sctp_remote_error);
+ sre->sre_error = error;
+ sre->sre_assoc_id = sctp_get_associd(stcb);
+ if (notif_len > sizeof(struct sctp_remote_error)) {
+ memcpy(sre->sre_data, chunk, chunk_len);
+ sre->sre_length += chunk_len;
+ }
+ SCTP_BUF_LEN(m_notify) = sre->sre_length;
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, stcb->asoc.context, 0, 0, 0,
+ m_notify);
+ if (control != NULL) {
+ control->length = SCTP_BUF_LEN(m_notify);
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ control->spec_flags = M_NOTIFICATION;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1,
+ SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+ } else {
+ sctp_m_freem(m_notify);
+ }
+}
+
+
void
sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
uint32_t error, void *data, int so_locked
@@ -3416,7 +3547,7 @@
switch (notification) {
case SCTP_NOTIFY_ASSOC_UP:
if (stcb->asoc.assoc_up_sent == 0) {
- sctp_notify_assoc_change(SCTP_COMM_UP, stcb, error, so_locked);
+ sctp_notify_assoc_change(SCTP_COMM_UP, stcb, error, NULL, 0, so_locked);
stcb->asoc.assoc_up_sent = 1;
}
if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) {
@@ -3428,7 +3559,7 @@
}
break;
case SCTP_NOTIFY_ASSOC_DOWN:
- sctp_notify_assoc_change(SCTP_SHUTDOWN_COMP, stcb, error, so_locked);
+ sctp_notify_assoc_change(SCTP_SHUTDOWN_COMP, stcb, error, NULL, 0, so_locked);
break;
case SCTP_NOTIFY_INTERFACE_DOWN:
{
@@ -3461,8 +3592,12 @@
sctp_notify_send_failed2(stcb, error,
(struct sctp_stream_queue_pending *)data, so_locked);
break;
- case SCTP_NOTIFY_DG_FAIL:
- sctp_notify_send_failed(stcb, error,
+ case SCTP_NOTIFY_SENT_DG_FAIL:
+ sctp_notify_send_failed(stcb, 1, error,
+ (struct sctp_tmit_chunk *)data, so_locked);
+ break;
+ case SCTP_NOTIFY_UNSENT_DG_FAIL:
+ sctp_notify_send_failed(stcb, 0, error,
(struct sctp_tmit_chunk *)data, so_locked);
break;
case SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION:
@@ -3474,42 +3609,50 @@
sctp_notify_partial_delivery_indication(stcb, error, val, so_locked);
break;
}
- case SCTP_NOTIFY_STRDATA_ERR:
- break;
- case SCTP_NOTIFY_ASSOC_ABORTED:
+ case SCTP_NOTIFY_ASSOC_LOC_ABORTED:
if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
- sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, so_locked);
+ sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 0, so_locked);
} else {
- sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, so_locked);
+ sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 0, so_locked);
}
break;
- case SCTP_NOTIFY_PEER_OPENED_STREAM:
- break;
- case SCTP_NOTIFY_STREAM_OPENED_OK:
+ case SCTP_NOTIFY_ASSOC_REM_ABORTED:
+ if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+ ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+ sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 1, so_locked);
+ } else {
+ sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 1, so_locked);
+ }
break;
case SCTP_NOTIFY_ASSOC_RESTART:
- sctp_notify_assoc_change(SCTP_RESTART, stcb, error, so_locked);
+ sctp_notify_assoc_change(SCTP_RESTART, stcb, error, NULL, 0, so_locked);
if (stcb->asoc.peer_supports_auth == 0) {
sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
NULL, so_locked);
}
break;
- case SCTP_NOTIFY_HB_RESP:
- break;
case SCTP_NOTIFY_STR_RESET_SEND:
- sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STREAM_RESET_INCOMING);
+ sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STREAM_RESET_OUTGOING_SSN);
break;
case SCTP_NOTIFY_STR_RESET_RECV:
- sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STREAM_RESET_OUTGOING);
+ sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STREAM_RESET_INCOMING);
break;
case SCTP_NOTIFY_STR_RESET_FAILED_OUT:
sctp_notify_stream_reset(stcb, error, ((uint16_t *) data),
- (SCTP_STREAM_RESET_OUTGOING | SCTP_STREAM_RESET_INCOMING));
+ (SCTP_STREAM_RESET_OUTGOING_SSN | SCTP_STREAM_RESET_FAILED));
+ break;
+ case SCTP_NOTIFY_STR_RESET_DENIED_OUT:
+ sctp_notify_stream_reset(stcb, error, ((uint16_t *) data),
+ (SCTP_STREAM_RESET_OUTGOING_SSN | SCTP_STREAM_RESET_DENIED));
break;
case SCTP_NOTIFY_STR_RESET_FAILED_IN:
sctp_notify_stream_reset(stcb, error, ((uint16_t *) data),
- (SCTP_STREAM_RESET_OUTGOING | SCTP_STREAM_RESET_INCOMING));
+ (SCTP_STREAM_RESET_INCOMING | SCTP_STREAM_RESET_FAILED));
+ break;
+ case SCTP_NOTIFY_STR_RESET_DENIED_IN:
+ sctp_notify_stream_reset(stcb, error, ((uint16_t *) data),
+ (SCTP_STREAM_RESET_INCOMING | SCTP_STREAM_RESET_DENIED));
break;
case SCTP_NOTIFY_ASCONF_ADD_IP:
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data,
@@ -3523,15 +3666,11 @@
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data,
error);
break;
- case SCTP_NOTIFY_ASCONF_SUCCESS:
- break;
- case SCTP_NOTIFY_ASCONF_FAILED:
- break;
case SCTP_NOTIFY_PEER_SHUTDOWN:
sctp_notify_shutdown_event(stcb);
break;
case SCTP_NOTIFY_AUTH_NEW_KEY:
- sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY, error,
+ sctp_notify_authentication(stcb, SCTP_AUTH_NEW_KEY, error,
(uint16_t) (uintptr_t) data,
so_locked);
break;
@@ -3548,6 +3687,9 @@
case SCTP_NOTIFY_SENDER_DRY:
sctp_notify_sender_dry_event(stcb, so_locked);
break;
+ case SCTP_NOTIFY_REMOTE_ERROR:
+ sctp_notify_remote_error(stcb, error, data);
+ break;
default:
SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n",
__FUNCTION__, notification, notification);
@@ -3556,7 +3698,7 @@
}
void
-sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked
+sctp_report_all_outbound(struct sctp_tcb *stcb, uint16_t error, int holds_lock, int so_locked
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
@@ -3591,8 +3733,8 @@
asoc->sent_queue_cnt--;
if (chk->data != NULL) {
sctp_free_bufspace(stcb, asoc, chk, 1);
- sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
- SCTP_NOTIFY_DATAGRAM_SENT, chk, so_locked);
+ sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb,
+ error, chk, so_locked);
if (chk->data) {
sctp_m_freem(chk->data);
chk->data = NULL;
@@ -3607,8 +3749,8 @@
asoc->send_queue_cnt--;
if (chk->data != NULL) {
sctp_free_bufspace(stcb, asoc, chk, 1);
- sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
- SCTP_NOTIFY_DATAGRAM_UNSENT, chk, so_locked);
+ sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
+ error, chk, so_locked);
if (chk->data) {
sctp_m_freem(chk->data);
chk->data = NULL;
@@ -3628,10 +3770,12 @@
sctp_free_spbufspace(stcb, asoc, sp);
if (sp->data) {
sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
- SCTP_NOTIFY_DATAGRAM_UNSENT, (void *)sp, so_locked);
+ error, (void *)sp, so_locked);
if (sp->data) {
sctp_m_freem(sp->data);
sp->data = NULL;
+ sp->tail_mbuf = NULL;
+ sp->length = 0;
}
}
if (sp->net) {
@@ -3650,7 +3794,8 @@
}
void
-sctp_abort_notification(struct sctp_tcb *stcb, int error, int so_locked
+sctp_abort_notification(struct sctp_tcb *stcb, uint8_t from_peer, uint16_t error,
+ struct sctp_abort_chunk *abort, int so_locked
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
@@ -3670,18 +3815,25 @@
return;
}
/* Tell them we lost the asoc */
- sctp_report_all_outbound(stcb, 1, so_locked);
- sctp_ulp_notify(SCTP_NOTIFY_ASSOC_ABORTED, stcb, error, NULL, so_locked);
+ sctp_report_all_outbound(stcb, error, 1, so_locked);
+ if (from_peer) {
+ sctp_ulp_notify(SCTP_NOTIFY_ASSOC_REM_ABORTED, stcb, error, abort, so_locked);
+ } else {
+ sctp_ulp_notify(SCTP_NOTIFY_ASSOC_LOC_ABORTED, stcb, error, abort, so_locked);
+ }
}
void
sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- struct mbuf *m, int iphlen, struct sctphdr *sh, struct mbuf *op_err,
+ struct mbuf *m, int iphlen,
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, struct mbuf *op_err,
+ uint8_t use_mflowid, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
uint32_t vtag;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -3690,15 +3842,17 @@
if (stcb != NULL) {
/* We have a TCB to abort, send notification too */
vtag = stcb->asoc.peer_vtag;
- sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+ sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED);
/* get the assoc vrf id and table id */
vrf_id = stcb->asoc.vrf_id;
stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
}
- sctp_send_abort(m, iphlen, sh, vtag, op_err, vrf_id, port);
+ sctp_send_abort(m, iphlen, src, dst, sh, vtag, op_err,
+ use_mflowid, mflowid,
+ vrf_id, port);
if (stcb != NULL) {
/* Ok, now lets free it */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -3712,7 +3866,7 @@
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -3784,19 +3938,19 @@
void
sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- int error, struct mbuf *op_err,
+ struct mbuf *op_err,
int so_locked
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
)
{
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
#endif
if (stcb == NULL) {
@@ -3812,12 +3966,10 @@
stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
}
/* notify the ulp */
- if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0)
- sctp_abort_notification(stcb, error, so_locked);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
+ sctp_abort_notification(stcb, 0, 0, NULL, so_locked);
+ }
/* notify the peer */
-#if defined(SCTP_PANIC_ON_ABORT)
- panic("aborting an association");
-#endif
sctp_send_abort_tcb(stcb, op_err, so_locked);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
@@ -3828,7 +3980,7 @@
#ifdef SCTP_ASOCLOG_OF_TSNS
sctp_print_out_track_log(stcb);
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -3838,7 +3990,7 @@
}
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -3846,8 +3998,11 @@
}
void
-sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
- struct sctp_inpcb *inp, struct mbuf *op_err, uint32_t vrf_id, uint16_t port)
+sctp_handle_ootb(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, struct sctp_inpcb *inp,
+ uint8_t use_mflowid, uint32_t mflowid,
+ uint32_t vrf_id, uint16_t port)
{
struct sctp_chunkhdr *ch, chunk_buf;
unsigned int chk_length;
@@ -3890,7 +4045,9 @@
*/
return;
case SCTP_SHUTDOWN_ACK:
- sctp_send_shutdown_complete2(m, sh, vrf_id, port);
+ sctp_send_shutdown_complete2(src, dst, sh,
+ use_mflowid, mflowid,
+ vrf_id, port);
return;
default:
break;
@@ -3902,7 +4059,9 @@
if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
(contains_init_chunk == 0))) {
- sctp_send_abort(m, iphlen, sh, 0, op_err, vrf_id, port);
+ sctp_send_abort(m, iphlen, src, dst, sh, 0, NULL,
+ use_mflowid, mflowid,
+ vrf_id, port);
}
}
@@ -4093,62 +4252,6 @@
}
void
-sctp_print_address_pkt(struct ip *iph, struct sctphdr *sh)
-{
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- struct sockaddr_in lsa, fsa;
-
- bzero(&lsa, sizeof(lsa));
- lsa.sin_len = sizeof(lsa);
- lsa.sin_family = AF_INET;
- lsa.sin_addr = iph->ip_src;
- lsa.sin_port = sh->src_port;
- bzero(&fsa, sizeof(fsa));
- fsa.sin_len = sizeof(fsa);
- fsa.sin_family = AF_INET;
- fsa.sin_addr = iph->ip_dst;
- fsa.sin_port = sh->dest_port;
- SCTP_PRINTF("src: ");
- sctp_print_address((struct sockaddr *)&lsa);
- SCTP_PRINTF("dest: ");
- sctp_print_address((struct sockaddr *)&fsa);
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- struct ip6_hdr *ip6;
- struct sockaddr_in6 lsa6, fsa6;
-
- ip6 = (struct ip6_hdr *)iph;
- bzero(&lsa6, sizeof(lsa6));
- lsa6.sin6_len = sizeof(lsa6);
- lsa6.sin6_family = AF_INET6;
- lsa6.sin6_addr = ip6->ip6_src;
- lsa6.sin6_port = sh->src_port;
- bzero(&fsa6, sizeof(fsa6));
- fsa6.sin6_len = sizeof(fsa6);
- fsa6.sin6_family = AF_INET6;
- fsa6.sin6_addr = ip6->ip6_dst;
- fsa6.sin6_port = sh->dest_port;
- SCTP_PRINTF("src: ");
- sctp_print_address((struct sockaddr *)&lsa6);
- SCTP_PRINTF("dest: ");
- sctp_print_address((struct sockaddr *)&fsa6);
- break;
- }
-#endif
- default:
- /* TSNH */
- break;
- }
-}
-
-void
sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
struct sctp_inpcb *new_inp,
struct sctp_tcb *stcb,
@@ -4320,7 +4423,7 @@
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
} else {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(inp);
@@ -4341,7 +4444,7 @@
}
#endif
sctp_sorwakeup(inp, inp->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -4474,7 +4577,7 @@
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
} else {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(inp);
@@ -4493,7 +4596,7 @@
}
#endif
sctp_sorwakeup(inp, inp->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -4566,7 +4669,7 @@
int
sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1,
- int reason, int so_locked
+ uint8_t sent, int so_locked
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
@@ -4593,7 +4696,11 @@
sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
stcb->asoc.peers_rwnd += tp1->send_size;
stcb->asoc.peers_rwnd += SCTP_BASE_SYSCTL(sctp_peer_chunk_oh);
- sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, reason, tp1, so_locked);
+ if (sent) {
+ sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, 0, tp1, so_locked);
+ } else {
+ sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, 0, tp1, so_locked);
+ }
if (tp1->data) {
sctp_m_freem(tp1->data);
tp1->data = NULL;
@@ -4640,7 +4747,11 @@
chk = tp1;
ret_sz += tp1->book_size;
sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
- sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, reason, tp1, so_locked);
+ if (sent) {
+ sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, 0, tp1, so_locked);
+ } else {
+ sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, 0, tp1, so_locked);
+ }
if (tp1->data) {
sctp_m_freem(tp1->data);
tp1->data = NULL;
@@ -4724,7 +4835,7 @@
/*
* Pull any data to free up the SB
* and allow sender to "add more"
- * whilc we will throw away :-)
+ * while we will throw away :-)
*/
sctp_free_spbufspace(stcb, &stcb->asoc,
sp);
@@ -4732,9 +4843,9 @@
do_wakeup_routine = 1;
sp->some_taken = 1;
sctp_m_freem(sp->data);
- sp->length = 0;
sp->data = NULL;
sp->tail_mbuf = NULL;
+ sp->length = 0;
}
break;
}
@@ -4742,7 +4853,7 @@
SCTP_TCB_SEND_UNLOCK(stcb);
}
if (do_wakeup_routine) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -4760,7 +4871,7 @@
}
#endif
sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -5115,7 +5226,7 @@
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
goto out;
}
- if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && (so->so_rcv.sb_cc == 0)) {
if (so->so_error) {
error = so->so_error;
if ((in_flags & MSG_PEEK) == 0)
@@ -5123,7 +5234,6 @@
goto out;
} else {
if (so->so_rcv.sb_cc == 0) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
/* indicate EOF */
error = 0;
goto out;
@@ -5392,7 +5502,7 @@
#ifdef INVARIANTS
panic("refcnt already incremented");
#else
- printf("refcnt already incremented?\n");
+ SCTP_PRINTF("refcnt already incremented?\n");
#endif
} else {
atomic_add_int(&stcb->asoc.refcnt, 1);
@@ -5529,7 +5639,7 @@
memcpy(from, &sin6, sizeof(struct sockaddr_in6));
}
#endif
-#if defined(INET6)
+#ifdef INET6
{
struct sockaddr_in6 lsa6, *from6;
@@ -6426,7 +6536,7 @@
return;
}
addr_touse = sa;
-#if defined(INET6)
+#ifdef INET6
if (sa->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
@@ -6675,83 +6785,61 @@
struct ip *iph;
struct mbuf *sp, *last;
struct udphdr *uhdr;
- uint16_t port = 0;
- int header_size = sizeof(struct udphdr) + sizeof(struct sctphdr);
-
+ uint16_t port;
+
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* Can't handle one that is not a pkt hdr */
+ goto out;
+ }
+ /* Pull the src port */
+ iph = mtod(m, struct ip *);
+ uhdr = (struct udphdr *)((caddr_t)iph + off);
+ port = uhdr->uh_sport;
/*
* Split out the mbuf chain. Leave the IP header in m, place the
* rest in the sp.
*/
- if ((m->m_flags & M_PKTHDR) == 0) {
- /* Can't handle one that is not a pkt hdr */
- goto out;
- }
- /* pull the src port */
- iph = mtod(m, struct ip *);
- uhdr = (struct udphdr *)((caddr_t)iph + off);
-
- port = uhdr->uh_sport;
sp = m_split(m, off, M_DONTWAIT);
if (sp == NULL) {
/* Gak, drop packet, we can't do a split */
goto out;
}
- if (sp->m_pkthdr.len < header_size) {
- /* Gak, packet can't have an SCTP header in it - to small */
+ if (sp->m_pkthdr.len < sizeof(struct udphdr) + sizeof(struct sctphdr)) {
+ /* Gak, packet can't have an SCTP header in it - too small */
m_freem(sp);
goto out;
}
- /* ok now pull up the UDP header and SCTP header together */
- sp = m_pullup(sp, header_size);
+ /* Now pull up the UDP header and SCTP header together */
+ sp = m_pullup(sp, sizeof(struct udphdr) + sizeof(struct sctphdr));
if (sp == NULL) {
/* Gak pullup failed */
goto out;
}
- /* trim out the UDP header */
+ /* Trim out the UDP header */
m_adj(sp, sizeof(struct udphdr));
/* Now reconstruct the mbuf chain */
- /* 1) find last one */
- last = m;
- while (last->m_next != NULL) {
- last = last->m_next;
- }
+ for (last = m; last->m_next; last = last->m_next);
last->m_next = sp;
m->m_pkthdr.len += sp->m_pkthdr.len;
- last = m;
- while (last != NULL) {
- last = last->m_next;
- }
- /* Now its ready for sctp_input or sctp6_input */
iph = mtod(m, struct ip *);
switch (iph->ip_v) {
#ifdef INET
case IPVERSION:
- {
- uint16_t len;
-
- /* its IPv4 */
- len = SCTP_GET_IPV4_LENGTH(iph);
- len -= sizeof(struct udphdr);
- SCTP_GET_IPV4_LENGTH(iph) = len;
- sctp_input_with_port(m, off, port);
- break;
- }
+ iph->ip_len -= sizeof(struct udphdr);
+ sctp_input_with_port(m, off, port);
+ break;
#endif
#ifdef INET6
case IPV6_VERSION >> 4:
- {
- /* its IPv6 - NOT supported */
- goto out;
- break;
-
- }
+ /* Not yet supported. */
+ goto out;
+ break;
+
#endif
default:
- {
- m_freem(m);
- break;
- }
+ goto out;
+ break;
}
return;
out:
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/sctputil.h
--- a/head/sys/netinet/sctputil.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/sctputil.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
+ * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
+ * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,14 +30,11 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/sctputil.h 237715 2012-06-28 16:01:08Z tuexen $");
-/* $KAME: sctputil.h,v 1.15 2005/03/06 16:04:19 itojun Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/sctputil.h 233660 2012-03-29 13:36:53Z rrs $");
-#ifndef __sctputil_h__
-#define __sctputil_h__
-
+#ifndef _NETINET_SCTP_UTIL_H_
+#define _NETINET_SCTP_UTIL_H_
#if defined(_KERNEL) || defined(__Userspace__)
@@ -170,7 +167,7 @@
void sctp_stop_timers_for_shutdown(struct sctp_tcb *);
void
-sctp_report_all_outbound(struct sctp_tcb *, int, int
+sctp_report_all_outbound(struct sctp_tcb *, uint16_t, int, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
@@ -179,7 +176,8 @@
int sctp_expand_mapping_array(struct sctp_association *, uint32_t);
void
-sctp_abort_notification(struct sctp_tcb *, int, int
+sctp_abort_notification(struct sctp_tcb *, uint8_t, uint16_t,
+ struct sctp_abort_chunk *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
@@ -187,13 +185,16 @@
/* We abort responding to an IP packet for some reason */
void
-sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *,
- struct mbuf *, int, struct sctphdr *, struct mbuf *, uint32_t, uint16_t);
+sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *,
+ int, struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, struct mbuf *,
+ uint8_t, uint32_t,
+ uint32_t, uint16_t);
/* We choose to abort via user input */
void
-sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *, int,
+sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *,
struct mbuf *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -201,8 +202,11 @@
);
void
-sctp_handle_ootb(struct mbuf *, int, int, struct sctphdr *,
- struct sctp_inpcb *, struct mbuf *, uint32_t, uint16_t);
+sctp_handle_ootb(struct mbuf *, int, int,
+ struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, struct sctp_inpcb *,
+ uint8_t, uint32_t,
+ uint32_t, uint16_t);
int
sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
@@ -239,11 +243,10 @@
int sctp_cmpaddr(struct sockaddr *, struct sockaddr *);
void sctp_print_address(struct sockaddr *);
-void sctp_print_address_pkt(struct ip *, struct sctphdr *);
int
sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *,
- int, int
+ uint8_t, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
@@ -383,7 +386,5 @@
void sctp_audit_log(uint8_t, uint8_t);
#endif
-
-
#endif /* _KERNEL */
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_hostcache.c
--- a/head/sys/netinet/tcp_hostcache.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_hostcache.c Wed Jul 25 16:40:53 2012 +0300
@@ -63,7 +63,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/tcp_hostcache.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_hostcache.c 238083 2012-07-03 18:59:13Z trociny $");
#include "opt_inet6.h"
@@ -624,7 +624,7 @@
msec(hc_entry->rmx_rtt *
(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
msec(hc_entry->rmx_rttvar *
- (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
+ (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE))),
hc_entry->rmx_bandwidth * 8,
hc_entry->rmx_cwnd,
hc_entry->rmx_sendpipe,
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_input.c
--- a/head/sys/netinet/tcp_input.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_input.c Wed Jul 25 16:40:53 2012 +0300
@@ -48,7 +48,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/tcp_input.c 234342 2012-04-16 13:49:03Z glebius $");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_input.c 238699 2012-07-22 17:31:36Z rwatson $");
#include "opt_ipfw.h" /* for ipfw_fwd */
#include "opt_inet.h"
@@ -105,6 +105,9 @@
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -512,6 +515,8 @@
(caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
return IPPROTO_DONE;
}
+ if (ia6)
+ ifa_free(&ia6->ia_ifa);
tcp_input(m, *offp);
return IPPROTO_DONE;
@@ -577,13 +582,31 @@
#ifdef INET6
if (isipv6) {
/* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
+
+ if (m->m_len < (sizeof(*ip6) + sizeof(*th))) {
+ m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
+ if (m == NULL) {
+ TCPSTAT_INC(tcps_rcvshort);
+ return;
+ }
+ }
+
ip6 = mtod(m, struct ip6_hdr *);
+ th = (struct tcphdr *)((caddr_t)ip6 + off0);
tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
- if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
+ if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+ if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+ th->th_sum = m->m_pkthdr.csum_data;
+ else
+ th->th_sum = in6_cksum_pseudo(ip6, tlen,
+ IPPROTO_TCP, m->m_pkthdr.csum_data);
+ th->th_sum ^= 0xffff;
+ } else
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
+ if (th->th_sum) {
TCPSTAT_INC(tcps_rcvbadsum);
goto drop;
}
- th = (struct tcphdr *)((caddr_t)ip6 + off0);
/*
* Be proactive about unspecified IPv6 address in source.
@@ -886,7 +909,7 @@
/*
* A previous connection in TIMEWAIT state is supposed to catch stray
* or duplicate segments arriving late. If this segment was a
- * legitimate new connection attempt the old INPCB gets removed and
+ * legitimate new connection attempt, the old INPCB gets removed and
* we can try again to find a listening socket.
*
* At this point, due to earlier optimism, we may hold only an inpcb
@@ -938,6 +961,14 @@
goto dropwithreset;
}
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_input(tp, m);
+ m = NULL; /* consumed by the TOE driver */
+ goto dropunlock;
+ }
+#endif
+
/*
* We've identified a valid inpcb, but it could be that we need an
* inpcbinfo write lock but don't hold it. In this case, attempt to
@@ -1222,7 +1253,8 @@
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
- ifa_free(&ia6->ia_ifa);
+ if (ia6)
+ ifa_free(&ia6->ia_ifa);
}
#endif /* INET6 */
/*
@@ -1299,7 +1331,7 @@
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(&to, optp, optlen, TO_SYN);
- syncache_add(&inc, &to, th, inp, &so, m);
+ syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
/*
* Entry added to syncache and mbuf consumed.
* Everything already unlocked by syncache_add().
@@ -1406,15 +1438,8 @@
/*
* If this is either a state-changing packet or current state isn't
* established, we require a write lock on tcbinfo. Otherwise, we
- * allow either a read lock or a write lock, as we may have acquired
- * a write lock due to a race.
- *
- * Require a global write lock for SYN/FIN/RST segments or
- * non-established connections; otherwise accept either a read or
- * write lock, as we may have conservatively acquired a write lock in
- * certain cases in tcp_input() (is this still true?). Currently we
- * will never enter with no lock, so we try to drop it quickly in the
- * common pure ack/pure data cases.
+ * allow the tcbinfo to be in either alocked or unlocked, as the
+ * caller may have unnecessarily acquired a write lock due to a race.
*/
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) {
@@ -3542,7 +3567,6 @@
if (inc->inc_flags & INC_ISIPV6) {
mss = V_tcp_v6mssdflt;
maxmtu = tcp_maxmtu6(inc, NULL);
- thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
}
#endif
@@ -3553,10 +3577,13 @@
{
mss = V_tcp_mssdflt;
maxmtu = tcp_maxmtu(inc, NULL);
- thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct tcpiphdr);
}
#endif
+#if defined(INET6) || defined(INET)
+ thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
+#endif
+
if (maxmtu && thcmtu)
mss = min(maxmtu, thcmtu) - min_protoh;
else if (maxmtu || thcmtu)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_lro.c
--- a/head/sys/netinet/tcp_lro.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_lro.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,397 +1,615 @@
-/******************************************************************************
+/*-
+ * Copyright (c) 2007, Myricom Inc.
+ * Copyright (c) 2008, Intel Corporation.
+ * Copyright (c) 2012 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Bjoern Zeeb
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
-Copyright (c) 2007, Myricom Inc.
-Copyright (c) 2008, Intel Corporation.
-All rights reserved.
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_lro.c 236394 2012-06-01 11:42:50Z bz $");
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Myricom Inc, nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
- 3. Neither the name of the Intel Corporation, nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-$FreeBSD: head/sys/netinet/tcp_lro.c 223797 2011-07-05 18:43:54Z cperciva $
-***************************************************************************/
+#include "opt_inet.h"
+#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/endian.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/ethernet.h>
-#include <net/if_media.h>
+#include <net/vnet.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
+#include <netinet/ip6.h>
#include <netinet/ip.h>
+#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_lro.h>
-#include <machine/bus.h>
+#include <netinet6/ip6_var.h>
+
#include <machine/in_cksum.h>
+#ifndef LRO_ENTRIES
+#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */
+#endif
-static uint16_t do_csum_data(uint16_t *raw, int len)
+#define TCP_LRO_UPDATE_CSUM 1
+#ifndef TCP_LRO_UPDATE_CSUM
+#define TCP_LRO_INVALID_CSUM 0x0000
+#endif
+
+int
+tcp_lro_init(struct lro_ctrl *lc)
{
- uint32_t csum;
- csum = 0;
- while (len > 0) {
- csum += *raw;
- raw++;
- csum += *raw;
- raw++;
- len -= 4;
- }
- csum = (csum >> 16) + (csum & 0xffff);
- csum = (csum >> 16) + (csum & 0xffff);
- return (uint16_t)csum;
-}
+ struct lro_entry *le;
+ int error, i;
-/*
- * Allocate and init the LRO data structures
- */
-int
-tcp_lro_init(struct lro_ctrl *cntl)
-{
- struct lro_entry *lro;
- int i, error = 0;
+ lc->lro_bad_csum = 0;
+ lc->lro_queued = 0;
+ lc->lro_flushed = 0;
+ lc->lro_cnt = 0;
+ SLIST_INIT(&lc->lro_free);
+ SLIST_INIT(&lc->lro_active);
- SLIST_INIT(&cntl->lro_free);
- SLIST_INIT(&cntl->lro_active);
-
- cntl->lro_bad_csum = 0;
- cntl->lro_queued = 0;
- cntl->lro_flushed = 0;
-
+ error = 0;
for (i = 0; i < LRO_ENTRIES; i++) {
- lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
- M_DEVBUF, M_NOWAIT | M_ZERO);
- if (lro == NULL) {
+ le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (le == NULL) {
if (i == 0)
error = ENOMEM;
break;
}
- cntl->lro_cnt = i;
- SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
+ lc->lro_cnt = i + 1;
+ SLIST_INSERT_HEAD(&lc->lro_free, le, next);
}
return (error);
}
void
-tcp_lro_free(struct lro_ctrl *cntl)
+tcp_lro_free(struct lro_ctrl *lc)
{
- struct lro_entry *entry;
+ struct lro_entry *le;
- while (!SLIST_EMPTY(&cntl->lro_free)) {
- entry = SLIST_FIRST(&cntl->lro_free);
- SLIST_REMOVE_HEAD(&cntl->lro_free, next);
- free(entry, M_DEVBUF);
+ while (!SLIST_EMPTY(&lc->lro_free)) {
+ le = SLIST_FIRST(&lc->lro_free);
+ SLIST_REMOVE_HEAD(&lc->lro_free, next);
+ free(le, M_DEVBUF);
}
}
-void
-tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
+#ifdef TCP_LRO_UPDATE_CSUM
+static uint16_t
+tcp_lro_csum_th(struct tcphdr *th)
{
- struct ifnet *ifp;
- struct ip *ip;
- struct tcphdr *tcp;
- uint32_t *ts_ptr;
- uint32_t tcplen, tcp_csum;
+ uint32_t ch;
+ uint16_t *p, l;
+ ch = th->th_sum = 0x0000;
+ l = th->th_off;
+ p = (uint16_t *)th;
+ while (l > 0) {
+ ch += *p;
+ p++;
+ ch += *p;
+ p++;
+ l--;
+ }
+ while (ch > 0xffff)
+ ch = (ch >> 16) + (ch & 0xffff);
- if (lro->append_cnt) {
- /* incorporate the new len into the ip header and
- * re-calculate the checksum */
- ip = lro->ip;
- ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
- ip->ip_sum = 0;
- ip->ip_sum = 0xffff ^
- do_csum_data((uint16_t*)ip,
- sizeof (*ip));
-
- lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
- CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- lro->m_head->m_pkthdr.csum_data = 0xffff;
- lro->m_head->m_pkthdr.len = lro->len;
-
- /* incorporate the latest ack into the tcp header */
- tcp = (struct tcphdr *) (ip + 1);
- tcp->th_ack = lro->ack_seq;
- tcp->th_win = lro->window;
- /* incorporate latest timestamp into the tcp header */
- if (lro->timestamp) {
- ts_ptr = (uint32_t *)(tcp + 1);
- ts_ptr[1] = htonl(lro->tsval);
- ts_ptr[2] = lro->tsecr;
- }
- /*
- * update checksum in tcp header by re-calculating the
- * tcp pseudoheader checksum, and adding it to the checksum
- * of the tcp payload data
- */
- tcp->th_sum = 0;
- tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
- tcp_csum = lro->data_csum;
- tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(tcplen + IPPROTO_TCP));
- tcp_csum += do_csum_data((uint16_t*)tcp,
- tcp->th_off << 2);
- tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
- tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
- tcp->th_sum = 0xffff ^ tcp_csum;
- }
- ifp = cntl->ifp;
- (*ifp->if_input)(cntl->ifp, lro->m_head);
- cntl->lro_queued += lro->append_cnt + 1;
- cntl->lro_flushed++;
- lro->m_head = NULL;
- lro->timestamp = 0;
- lro->append_cnt = 0;
- SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
+ return (ch & 0xffff);
}
-int
-tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
+static uint16_t
+tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
+ uint16_t tcp_data_len, uint16_t csum)
{
- struct ether_header *eh;
- struct ip *ip;
- struct tcphdr *tcp;
- uint32_t *ts_ptr;
- struct mbuf *m_nxt, *m_tail;
- struct lro_entry *lro;
- int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
- int opt_bytes, trim, csum_flags;
- uint32_t seq, tmp_csum, device_mtu;
+ uint32_t c;
+ uint16_t cs;
+ c = csum;
- eh = mtod(m_head, struct ether_header *);
- if (eh->ether_type != htons(ETHERTYPE_IP))
- return 1;
- ip = (struct ip *) (eh + 1);
- if (ip->ip_p != IPPROTO_TCP)
- return 1;
-
- /* ensure there are no options */
- if ((ip->ip_hl << 2) != sizeof (*ip))
- return -1;
+ /* Remove length from checksum. */
+ switch (le->eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6;
- /* .. and the packet is not fragmented */
- if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
- return -1;
+ ip6 = (struct ip6_hdr *)l3hdr;
+ if (le->append_cnt == 0)
+ cs = ip6->ip6_plen;
+ else {
+ uint32_t cx;
- /* verify that the IP header checksum is correct */
- csum_flags = m_head->m_pkthdr.csum_flags;
+ cx = ntohs(ip6->ip6_plen);
+ cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
+ }
+ break;
+ }
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip4;
+
+ ip4 = (struct ip *)l3hdr;
+ if (le->append_cnt == 0)
+ cs = ip4->ip_len;
+ else {
+ cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
+ IPPROTO_TCP);
+ cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
+ htons(cs));
+ }
+ break;
+ }
+#endif
+ default:
+ cs = 0; /* Keep compiler happy. */
+ }
+
+ cs = ~cs;
+ c += cs;
+
+ /* Remove TCP header csum. */
+ cs = ~tcp_lro_csum_th(th);
+ c += cs;
+ while (c > 0xffff)
+ c = (c >> 16) + (c & 0xffff);
+
+ return (c & 0xffff);
+}
+#endif
+
+void
+tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
+{
+
+ if (le->append_cnt > 0) {
+ struct tcphdr *th;
+ uint16_t p_len;
+
+ p_len = htons(le->p_len);
+ switch (le->eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6;
+
+ ip6 = le->le_ip6;
+ ip6->ip6_plen = p_len;
+ th = (struct tcphdr *)(ip6 + 1);
+ le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
+ CSUM_PSEUDO_HDR;
+ le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
+ break;
+ }
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip4;
+#ifdef TCP_LRO_UPDATE_CSUM
+ uint32_t cl;
+ uint16_t c;
+#endif
+
+ ip4 = le->le_ip4;
+#ifdef TCP_LRO_UPDATE_CSUM
+ /* Fix IP header checksum for new length. */
+ c = ~ip4->ip_sum;
+ cl = c;
+ c = ~ip4->ip_len;
+ cl += c + p_len;
+ while (cl > 0xffff)
+ cl = (cl >> 16) + (cl & 0xffff);
+ c = cl;
+ ip4->ip_sum = ~c;
+#else
+ ip4->ip_sum = TCP_LRO_INVALID_CSUM;
+#endif
+ ip4->ip_len = p_len;
+ th = (struct tcphdr *)(ip4 + 1);
+ le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
+ CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
+ le->p_len += ETHER_HDR_LEN;
+ break;
+ }
+#endif
+ default:
+ th = NULL; /* Keep compiler happy. */
+ }
+ le->m_head->m_pkthdr.csum_data = 0xffff;
+ le->m_head->m_pkthdr.len = le->p_len;
+
+ /* Incorporate the latest ACK into the TCP header. */
+ th->th_ack = le->ack_seq;
+ th->th_win = le->window;
+ /* Incorporate latest timestamp into the TCP header. */
+ if (le->timestamp != 0) {
+ uint32_t *ts_ptr;
+
+ ts_ptr = (uint32_t *)(th + 1);
+ ts_ptr[1] = htonl(le->tsval);
+ ts_ptr[2] = le->tsecr;
+ }
+#ifdef TCP_LRO_UPDATE_CSUM
+ /* Update the TCP header checksum. */
+ le->ulp_csum += p_len;
+ le->ulp_csum += tcp_lro_csum_th(th);
+ while (le->ulp_csum > 0xffff)
+ le->ulp_csum = (le->ulp_csum >> 16) +
+ (le->ulp_csum & 0xffff);
+ th->th_sum = (le->ulp_csum & 0xffff);
+ th->th_sum = ~th->th_sum;
+#else
+ th->th_sum = TCP_LRO_INVALID_CSUM;
+#endif
+ }
+
+ (*lc->ifp->if_input)(lc->ifp, le->m_head);
+ lc->lro_queued += le->append_cnt + 1;
+ lc->lro_flushed++;
+ bzero(le, sizeof(*le));
+ SLIST_INSERT_HEAD(&lc->lro_free, le, next);
+}
+
+#ifdef INET6
+static int
+tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
+ struct tcphdr **th)
+{
+
+ /* XXX-BZ we should check the flow-label. */
+
+ /* XXX-BZ We do not yet support ext. hdrs. */
+ if (ip6->ip6_nxt != IPPROTO_TCP)
+ return (TCP_LRO_NOT_SUPPORTED);
+
+ /* Find the TCP header. */
+ *th = (struct tcphdr *)(ip6 + 1);
+
+ return (0);
+}
+#endif
+
+#ifdef INET
+static int
+tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
+ struct tcphdr **th)
+{
+ int csum_flags;
+ uint16_t csum;
+
+ if (ip4->ip_p != IPPROTO_TCP)
+ return (TCP_LRO_NOT_SUPPORTED);
+
+ /* Ensure there are no options. */
+ if ((ip4->ip_hl << 2) != sizeof (*ip4))
+ return (TCP_LRO_CANNOT);
+
+ /* .. and the packet is not fragmented. */
+ if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
+ return (TCP_LRO_CANNOT);
+
+ /* Legacy IP has a header checksum that needs to be correct. */
+ csum_flags = m->m_pkthdr.csum_flags;
if (csum_flags & CSUM_IP_CHECKED) {
if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
- cntl->lro_bad_csum++;
- return -1;
+ lc->lro_bad_csum++;
+ return (TCP_LRO_CANNOT);
}
} else {
- tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
- if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
- cntl->lro_bad_csum++;
- return -1;
- }
- }
-
- /* find the TCP header */
- tcp = (struct tcphdr *) (ip + 1);
-
- /* Get the TCP checksum if we dont have it */
- if (!csum)
- csum = tcp->th_sum;
-
- /* ensure no bits set besides ack or psh */
- if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
- return -1;
-
- /* check for timestamps. Since the only option we handle are
- timestamps, we only have to handle the simple case of
- aligned timestamps */
-
- opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
- tcp_hdr_len = sizeof (*tcp) + opt_bytes;
- ts_ptr = (uint32_t *)(tcp + 1);
- if (opt_bytes != 0) {
- if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
- (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
- TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
- return -1;
- }
-
- ip_len = ntohs(ip->ip_len);
- tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
-
-
- /*
- * If frame is padded beyond the end of the IP packet,
- * then we must trim the extra bytes off the end.
- */
- tot_len = m_head->m_pkthdr.len;
- trim = tot_len - (ip_len + ETHER_HDR_LEN);
- if (trim != 0) {
- if (trim < 0) {
- /* truncated packet */
- return -1;
- }
- m_adj(m_head, -trim);
- tot_len = m_head->m_pkthdr.len;
- }
-
- m_nxt = m_head;
- m_tail = NULL; /* -Wuninitialized */
- while (m_nxt != NULL) {
- m_tail = m_nxt;
- m_nxt = m_tail->m_next;
- }
-
- hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
- seq = ntohl(tcp->th_seq);
-
- SLIST_FOREACH(lro, &cntl->lro_active, next) {
- if (lro->source_port == tcp->th_sport &&
- lro->dest_port == tcp->th_dport &&
- lro->source_ip == ip->ip_src.s_addr &&
- lro->dest_ip == ip->ip_dst.s_addr) {
- /* Flush now if appending will result in overflow. */
- if (lro->len > (65535 - tcp_data_len)) {
- SLIST_REMOVE(&cntl->lro_active, lro,
- lro_entry, next);
- tcp_lro_flush(cntl, lro);
- break;
- }
-
- /* Try to append it */
-
- if (__predict_false(seq != lro->next_seq ||
- (tcp_data_len == 0 &&
- lro->ack_seq == tcp->th_ack))) {
- /* out of order packet or dup ack */
- SLIST_REMOVE(&cntl->lro_active, lro,
- lro_entry, next);
- tcp_lro_flush(cntl, lro);
- return -1;
- }
-
- if (opt_bytes) {
- uint32_t tsval = ntohl(*(ts_ptr + 1));
- /* make sure timestamp values are increasing */
- if (__predict_false(lro->tsval > tsval ||
- *(ts_ptr + 2) == 0)) {
- return -1;
- }
- lro->tsval = tsval;
- lro->tsecr = *(ts_ptr + 2);
- }
-
- lro->next_seq += tcp_data_len;
- lro->ack_seq = tcp->th_ack;
- lro->window = tcp->th_win;
- lro->append_cnt++;
- if (tcp_data_len == 0) {
- m_freem(m_head);
- return 0;
- }
- /* subtract off the checksum of the tcp header
- * from the hardware checksum, and add it to the
- * stored tcp data checksum. Byteswap the checksum
- * if the total length so far is odd
- */
- tmp_csum = do_csum_data((uint16_t*)tcp,
- tcp_hdr_len);
- csum = csum + (tmp_csum ^ 0xffff);
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- if (lro->len & 0x1) {
- /* Odd number of bytes so far, flip bytes */
- csum = ((csum << 8) | (csum >> 8)) & 0xffff;
- }
- csum = csum + lro->data_csum;
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- lro->data_csum = csum;
-
- lro->len += tcp_data_len;
-
- /* adjust mbuf so that m->m_data points to
- the first byte of the payload */
- m_adj(m_head, hlen);
- /* append mbuf chain */
- lro->m_tail->m_next = m_head;
- /* advance the last pointer */
- lro->m_tail = m_tail;
- /* flush packet if required */
- device_mtu = cntl->ifp->if_mtu;
- if (lro->len > (65535 - device_mtu)) {
- SLIST_REMOVE(&cntl->lro_active, lro,
- lro_entry, next);
- tcp_lro_flush(cntl, lro);
- }
- return 0;
+ csum = in_cksum_hdr(ip4);
+ if (__predict_false((csum ^ 0xffff) != 0)) {
+ lc->lro_bad_csum++;
+ return (TCP_LRO_CANNOT);
}
}
- if (SLIST_EMPTY(&cntl->lro_free))
- return -1;
+ /* Find the TCP header (we assured there are no IP options). */
+ *th = (struct tcphdr *)(ip4 + 1);
- /* start a new chain */
- lro = SLIST_FIRST(&cntl->lro_free);
- SLIST_REMOVE_HEAD(&cntl->lro_free, next);
- SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
- lro->source_port = tcp->th_sport;
- lro->dest_port = tcp->th_dport;
- lro->source_ip = ip->ip_src.s_addr;
- lro->dest_ip = ip->ip_dst.s_addr;
- lro->next_seq = seq + tcp_data_len;
- lro->mss = tcp_data_len;
- lro->ack_seq = tcp->th_ack;
- lro->window = tcp->th_win;
+ return (0);
+}
+#endif
- /* save the checksum of just the TCP payload by
- * subtracting off the checksum of the TCP header from
- * the entire hardware checksum
- * Since IP header checksum is correct, checksum over
- * the IP header is -0. Substracting -0 is unnecessary.
+int
+tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+{
+ struct lro_entry *le;
+ struct ether_header *eh;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
+#endif
+#ifdef INET
+ struct ip *ip4 = NULL; /* Keep compiler happy. */
+#endif
+ struct tcphdr *th;
+ void *l3hdr = NULL; /* Keep compiler happy. */
+ uint32_t *ts_ptr;
+ tcp_seq seq;
+ int error, ip_len, l;
+ uint16_t eh_type, tcp_data_len;
+
+ /* We expect a contiguous header [eh, ip, tcp]. */
+
+ eh = mtod(m, struct ether_header *);
+ eh_type = ntohs(eh->ether_type);
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ CURVNET_SET(lc->ifp->if_vnet);
+ if (V_ip6_forwarding != 0) {
+ /* XXX-BZ stats but changing lro_ctrl is a problem. */
+ CURVNET_RESTORE();
+ return (TCP_LRO_CANNOT);
+ }
+ CURVNET_RESTORE();
+ l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
+ error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
+ if (error != 0)
+ return (error);
+ tcp_data_len = ntohs(ip6->ip6_plen);
+ ip_len = sizeof(*ip6) + tcp_data_len;
+ break;
+ }
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ CURVNET_SET(lc->ifp->if_vnet);
+ if (V_ipforwarding != 0) {
+ /* XXX-BZ stats but changing lro_ctrl is a problem. */
+ CURVNET_RESTORE();
+ return (TCP_LRO_CANNOT);
+ }
+ CURVNET_RESTORE();
+ l3hdr = ip4 = (struct ip *)(eh + 1);
+ error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
+ if (error != 0)
+ return (error);
+ ip_len = ntohs(ip4->ip_len);
+ tcp_data_len = ip_len - sizeof(*ip4);
+ break;
+ }
+#endif
+ /* XXX-BZ what happens in case of VLAN(s)? */
+ default:
+ return (TCP_LRO_NOT_SUPPORTED);
+ }
+
+ /*
+ * If the frame is padded beyond the end of the IP packet, then we must
+ * trim the extra bytes off.
*/
- tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
- csum = csum + (tmp_csum ^ 0xffff);
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- lro->data_csum = csum;
-
- lro->ip = ip;
- /* record timestamp if it is present */
- if (opt_bytes) {
- lro->timestamp = 1;
- lro->tsval = ntohl(*(ts_ptr + 1));
- lro->tsecr = *(ts_ptr + 2);
+ l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
+ if (l != 0) {
+ if (l < 0)
+ /* Truncated packet. */
+ return (TCP_LRO_CANNOT);
+
+ m_adj(m, -l);
}
- lro->len = tot_len;
- lro->m_head = m_head;
- lro->m_tail = m_tail;
- return 0;
+
+ /*
+ * Check TCP header constraints.
+ */
+ /* Ensure no bits set besides ACK or PSH. */
+ if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
+ return (TCP_LRO_CANNOT);
+
+ /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
+ /* XXX-BZ Ideally we'd flush on PUSH? */
+
+ /*
+ * Check for timestamps.
+ * Since the only option we handle are timestamps, we only have to
+ * handle the simple case of aligned timestamps.
+ */
+ l = (th->th_off << 2);
+ tcp_data_len -= l;
+ l -= sizeof(*th);
+ ts_ptr = (uint32_t *)(th + 1);
+ if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
+ (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
+ return (TCP_LRO_CANNOT);
+
+ /* If the driver did not pass in the checksum, set it now. */
+ if (csum == 0x0000)
+ csum = th->th_sum;
+
+ seq = ntohl(th->th_seq);
+
+ /* Try to find a matching previous segment. */
+ SLIST_FOREACH(le, &lc->lro_active, next) {
+ if (le->eh_type != eh_type)
+ continue;
+ if (le->source_port != th->th_sport ||
+ le->dest_port != th->th_dport)
+ continue;
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ if (bcmp(&le->source_ip6, &ip6->ip6_src,
+ sizeof(struct in6_addr)) != 0 ||
+ bcmp(&le->dest_ip6, &ip6->ip6_dst,
+ sizeof(struct in6_addr)) != 0)
+ continue;
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ if (le->source_ip4 != ip4->ip_src.s_addr ||
+ le->dest_ip4 != ip4->ip_dst.s_addr)
+ continue;
+ break;
+#endif
+ }
+
+ /* Flush now if appending will result in overflow. */
+ if (le->p_len > (65535 - tcp_data_len)) {
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ break;
+ }
+
+ /* Try to append the new segment. */
+ if (__predict_false(seq != le->next_seq ||
+ (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
+ /* Out of order packet or duplicate ACK. */
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ return (TCP_LRO_CANNOT);
+ }
+
+ if (l != 0) {
+ uint32_t tsval = ntohl(*(ts_ptr + 1));
+ /* Make sure timestamp values are increasing. */
+ /* XXX-BZ flip and use TSTMP_GEQ macro for this? */
+ if (__predict_false(le->tsval > tsval ||
+ *(ts_ptr + 2) == 0))
+ return (TCP_LRO_CANNOT);
+ le->tsval = tsval;
+ le->tsecr = *(ts_ptr + 2);
+ }
+
+ le->next_seq += tcp_data_len;
+ le->ack_seq = th->th_ack;
+ le->window = th->th_win;
+ le->append_cnt++;
+
+#ifdef TCP_LRO_UPDATE_CSUM
+ le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
+ tcp_data_len, ~csum);
+#endif
+
+ if (tcp_data_len == 0) {
+ m_freem(m);
+ return (0);
+ }
+
+ le->p_len += tcp_data_len;
+
+ /*
+ * Adjust the mbuf so that m_data points to the first byte of
+ * the ULP payload. Adjust the mbuf to avoid complications and
+ * append new segment to existing mbuf chain.
+ */
+ m_adj(m, m->m_pkthdr.len - tcp_data_len);
+ m->m_flags &= ~M_PKTHDR;
+
+ le->m_tail->m_next = m;
+ le->m_tail = m_last(m);
+
+ /*
+ * If a possible next full length packet would cause an
+ * overflow, pro-actively flush now.
+ */
+ if (le->p_len > (65535 - lc->ifp->if_mtu)) {
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ }
+
+ return (0);
+ }
+
+ /* Try to find an empty slot. */
+ if (SLIST_EMPTY(&lc->lro_free))
+ return (TCP_LRO_CANNOT);
+
+ /* Start a new segment chain. */
+ le = SLIST_FIRST(&lc->lro_free);
+ SLIST_REMOVE_HEAD(&lc->lro_free, next);
+ SLIST_INSERT_HEAD(&lc->lro_active, le, next);
+
+ /* Start filling in details. */
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ le->le_ip6 = ip6;
+ le->source_ip6 = ip6->ip6_src;
+ le->dest_ip6 = ip6->ip6_dst;
+ le->eh_type = eh_type;
+ le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ le->le_ip4 = ip4;
+ le->source_ip4 = ip4->ip_src.s_addr;
+ le->dest_ip4 = ip4->ip_dst.s_addr;
+ le->eh_type = eh_type;
+ le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
+ break;
+#endif
+ }
+ le->source_port = th->th_sport;
+ le->dest_port = th->th_dport;
+
+ le->next_seq = seq + tcp_data_len;
+ le->ack_seq = th->th_ack;
+ le->window = th->th_win;
+ if (l != 0) {
+ le->timestamp = 1;
+ le->tsval = ntohl(*(ts_ptr + 1));
+ le->tsecr = *(ts_ptr + 2);
+ }
+
+#ifdef TCP_LRO_UPDATE_CSUM
+ /*
+ * Do not touch the csum of the first packet. However save the
+ * "adjusted" checksum of just the source and destination addresses,
+ * the next header and the TCP payload. The length and TCP header
+ * parts may change, so we remove those from the saved checksum and
+ * re-add with final values on tcp_lro_flush() if needed.
+ */
+ KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
+ __func__, le, le->ulp_csum));
+
+ le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
+ ~csum);
+ th->th_sum = csum; /* Restore checksum on first packet. */
+#endif
+
+ le->m_head = m;
+ le->m_tail = m_last(m);
+
+ return (0);
}
+
+/* end */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_lro.h
--- a/head/sys/netinet/tcp_lro.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_lro.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,67 +1,75 @@
-/*******************************************************************************
+/*-
+ * Copyright (c) 2006, Myricom Inc.
+ * Copyright (c) 2008, Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/netinet/tcp_lro.h 235944 2012-05-24 23:03:23Z bz $
+ */
-Copyright (c) 2006, Myricom Inc.
-Copyright (c) 2008, Intel Corporation.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Myricom Inc, nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
- 2. Neither the name of the Intel Corporation, nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-
-$FreeBSD$
-
-***************************************************************************/
#ifndef _TCP_LRO_H_
#define _TCP_LRO_H_
-struct lro_entry;
struct lro_entry
{
- SLIST_ENTRY(lro_entry) next;
- struct mbuf *m_head;
- struct mbuf *m_tail;
- int timestamp;
- struct ip *ip;
- uint32_t tsval;
- uint32_t tsecr;
- uint32_t source_ip;
- uint32_t dest_ip;
- uint32_t next_seq;
- uint32_t ack_seq;
- uint32_t len;
- uint32_t data_csum;
- uint16_t window;
- uint16_t source_port;
- uint16_t dest_port;
- uint16_t append_cnt;
- uint16_t mss;
-
+ SLIST_ENTRY(lro_entry) next;
+ struct mbuf *m_head;
+ struct mbuf *m_tail;
+ union {
+ struct ip *ip4;
+ struct ip6_hdr *ip6;
+ } leip;
+ union {
+ in_addr_t s_ip4;
+ struct in6_addr s_ip6;
+ } lesource;
+ union {
+ in_addr_t d_ip4;
+ struct in6_addr d_ip6;
+ } ledest;
+ uint16_t source_port;
+ uint16_t dest_port;
+ uint16_t eh_type; /* EthernetHeader type. */
+ uint16_t append_cnt;
+ uint32_t p_len; /* IP header payload length. */
+ uint32_t ulp_csum; /* TCP, etc. checksum. */
+ uint32_t next_seq; /* tcp_seq */
+ uint32_t ack_seq; /* tcp_seq */
+ uint32_t tsval;
+ uint32_t tsecr;
+ uint16_t window;
+ uint16_t timestamp; /* flag, not a TCP hdr field. */
};
SLIST_HEAD(lro_head, lro_entry);
+#define le_ip4 leip.ip4
+#define le_ip6 leip.ip6
+#define source_ip4 lesource.s_ip4
+#define dest_ip4 ledest.d_ip4
+#define source_ip6 lesource.s_ip6
+#define dest_ip6 ledest.d_ip6
+
+/* NB: This is part of driver structs. */
struct lro_ctrl {
struct ifnet *ifp;
int lro_queued;
@@ -73,13 +81,12 @@
struct lro_head lro_free;
};
-
int tcp_lro_init(struct lro_ctrl *);
void tcp_lro_free(struct lro_ctrl *);
void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
-/* Number of LRO entries - these are per rx queue */
-#define LRO_ENTRIES 8
+#define TCP_LRO_CANNOT -1
+#define TCP_LRO_NOT_SUPPORTED 1
#endif /* _TCP_LRO_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_offload.c
--- a/head/sys/netinet/tcp_offload.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_offload.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,145 +1,176 @@
/*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_offload.c 237263 2012-06-19 07:34:13Z np $");
+
+#include "opt_inet.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-
+#include <sys/sockopt.h>
#include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
#include <net/route.h>
-#include <net/vnet.h>
-
#include <netinet/in.h>
-#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
-#include <netinet/toedev.h>
+#define TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/toecore.h>
-uint32_t toedev_registration_count;
+int registered_toedevs;
+/*
+ * Provide an opportunity for a TOE driver to offload.
+ */
int
tcp_offload_connect(struct socket *so, struct sockaddr *nam)
{
struct ifnet *ifp;
- struct toedev *tdev;
+ struct toedev *tod;
struct rtentry *rt;
- int error;
+ int error = EOPNOTSUPP;
- if (toedev_registration_count == 0)
- return (EINVAL);
-
- /*
- * Look up the route used for the connection to
- * determine if it uses an interface capable of
- * offloading the connection.
- */
- rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
- if (rt)
+ INP_WLOCK_ASSERT(sotoinpcb(so));
+ KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
+ ("%s: called with sa_family %d", __func__, nam->sa_family));
+
+ if (registered_toedevs == 0)
+ return (error);
+
+ rt = rtalloc1(nam, 0, 0);
+ if (rt)
RT_UNLOCK(rt);
- else
+ else
return (EHOSTUNREACH);
ifp = rt->rt_ifp;
- if ((ifp->if_capenable & IFCAP_TOE) == 0) {
- error = EINVAL;
- goto fail;
- }
-
- tdev = TOEDEV(ifp);
- if (tdev == NULL) {
- error = EPERM;
- goto fail;
- }
-
- if (tdev->tod_can_offload(tdev, so) == 0) {
- error = EPERM;
- goto fail;
- }
-
- return (tdev->tod_connect(tdev, so, rt, nam));
-fail:
+
+ if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
+ goto done;
+ if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
+ goto done;
+
+ tod = TOEDEV(ifp);
+ if (tod != NULL)
+ error = tod->tod_connect(tod, so, rt, nam);
+done:
RTFREE(rt);
return (error);
}
+void
+tcp_offload_listen_start(struct tcpcb *tp)
+{
-/*
- * This file contains code as a short-term staging area before it is moved in
- * to sys/netinet/tcp_offload.c
- */
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+}
void
-tcp_offload_twstart(struct tcpcb *tp)
+tcp_offload_listen_stop(struct tcpcb *tp)
{
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
}
-struct tcpcb *
-tcp_offload_close(struct tcpcb *tp)
+void
+tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
{
+ struct toedev *tod = tp->tod;
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tp = tcp_close(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- if (tp)
- INP_WUNLOCK(tp->t_inpcb);
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
- return (tp);
+ tod->tod_input(tod, tp, m);
}
-struct tcpcb *
-tcp_offload_drop(struct tcpcb *tp, int error)
+int
+tcp_offload_output(struct tcpcb *tp)
{
+ struct toedev *tod = tp->tod;
+ int error, flags;
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tp = tcp_drop(tp, error);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- if (tp)
- INP_WUNLOCK(tp->t_inpcb);
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
- return (tp);
+ flags = tcp_outflags[tp->t_state];
+
+ if (flags & TH_RST) {
+ /* XXX: avoid repeated calls like we do for FIN */
+ error = tod->tod_send_rst(tod, tp);
+ } else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
+ (tp->t_flags & TF_SENTFIN) == 0) {
+ error = tod->tod_send_fin(tod, tp);
+ if (error == 0)
+ tp->t_flags |= TF_SENTFIN;
+ } else
+ error = tod->tod_output(tod, tp);
+
+ return (error);
}
+void
+tcp_offload_rcvd(struct tcpcb *tp)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_rcvd(tod, tp);
+}
+
+void
+tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
+}
+
+void
+tcp_offload_detach(struct tcpcb *tp)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_pcb_detach(tod, tp);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_offload.h
--- a/head/sys/netinet/tcp_offload.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_offload.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,30 +1,30 @@
/*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
+ * $FreeBSD: head/sys/netinet/tcp_offload.h 237263 2012-06-19 07:34:13Z np $
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
*/
#ifndef _NETINET_TCP_OFFLOAD_H_
@@ -34,321 +34,15 @@
#error "no user-serviceable parts inside"
#endif
-/*
- * A driver publishes that it provides offload services
- * by setting IFCAP_TOE in the ifnet. The offload connect
- * will bypass any further work if the interface that a
- * connection would use does not support TCP offload.
- *
- * The TOE API assumes that the tcp offload engine can offload the
- * the entire connection from set up to teardown, with some provision
- * being made to allowing the software stack to handle time wait. If
- * the device does not meet these criteria, it is the driver's responsibility
- * to overload the functions that it needs to in tcp_usrreqs and make
- * its own calls to tcp_output if it needs to do so.
- *
- * There is currently no provision for the device advertising the congestion
- * control algorithms it supports as there is currently no API for querying
- * an operating system for the protocols that it has loaded. This is a desirable
- * future extension.
- *
- *
- *
- * It is assumed that individuals deploying TOE will want connections
- * to be offloaded without software changes so all connections on an
- * interface providing TOE are offloaded unless the SO_NO_OFFLOAD
- * flag is set on the socket.
- *
- *
- * The toe_usrreqs structure constitutes the TOE driver's
- * interface to the TCP stack for functionality that doesn't
- * interact directly with userspace. If one wants to provide
- * (optional) functionality to do zero-copy to/from
- * userspace one still needs to override soreceive/sosend
- * with functions that fault in and pin the user buffers.
- *
- * + tu_send
- * - tells the driver that new data may have been added to the
- * socket's send buffer - the driver should not fail if the
- * buffer is in fact unchanged
- * - the driver is responsible for providing credits (bytes in the send window)
- * back to the socket by calling sbdrop() as segments are acknowledged.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_rcvd
- * - returns credits to the driver and triggers window updates
- * to the peer (a credit as used here is a byte in the peer's receive window)
- * - the driver is expected to determine how many bytes have been
- * consumed and credit that back to the card so that it can grow
- * the window again by maintaining its own state between invocations.
- * - In principle this could be used to shrink the window as well as
- * grow the window, although it is not used for that now.
- * - this function needs to correctly handle being called any number of
- * times without any bytes being consumed from the receive buffer.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_disconnect
- * - tells the driver to send FIN to peer
- * - driver is expected to send the remaining data and then do a clean half close
- * - disconnect implies at least half-close so only send, reset, and detach
- * are legal
- * - the driver is expected to handle transition through the shutdown
- * state machine and allow the stack to support SO_LINGER.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_reset
- * - closes the connection and sends a RST to peer
- * - driver is expectd to trigger an RST and detach the toepcb
- * - no further calls are legal after reset
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * The following fields in the tcpcb are expected to be referenced by the driver:
- * + iss
- * + rcv_nxt
- * + rcv_wnd
- * + snd_isn
- * + snd_max
- * + snd_nxt
- * + snd_una
- * + t_flags
- * + t_inpcb
- * + t_maxseg
- * + t_toe
- *
- * The following fields in the inpcb are expected to be referenced by the driver:
- * + inp_lport
- * + inp_fport
- * + inp_laddr
- * + inp_fport
- * + inp_socket
- * + inp_ip_tos
- *
- * The following fields in the socket are expected to be referenced by the
- * driver:
- * + so_comp
- * + so_error
- * + so_linger
- * + so_options
- * + so_rcv
- * + so_snd
- * + so_state
- * + so_timeo
- *
- * These functions all return 0 on success and can return the following errors
- * as appropriate:
- * + EPERM:
- * + ENOBUFS: memory allocation failed
- * + EMSGSIZE: MTU changed during the call
- * + EHOSTDOWN:
- * + EHOSTUNREACH:
- * + ENETDOWN:
- * * ENETUNREACH: the peer is no longer reachable
- *
- * + tu_detach
- * - tells driver that the socket is going away so disconnect
- * the toepcb and free appropriate resources
- * - allows the driver to cleanly handle the case of connection state
- * outliving the socket
- * - no further calls are legal after detach
- * - the driver is expected to provide its own synchronization between
- * detach and receiving new data.
- *
- * + tu_syncache_event
- * - even if it is not actually needed, the driver is expected to
- * call syncache_add for the initial SYN and then syncache_expand
- * for the SYN,ACK
- * - tells driver that a connection either has not been added or has
- * been dropped from the syncache
- * - the driver is expected to maintain state that lives outside the
- * software stack so the syncache needs to be able to notify the
- * toe driver that the software stack is not going to create a connection
- * for a received SYN
- * - The driver is responsible for any synchronization required between
- * the syncache dropping an entry and the driver processing the SYN,ACK.
- *
- */
-struct toe_usrreqs {
- int (*tu_send)(struct tcpcb *tp);
- int (*tu_rcvd)(struct tcpcb *tp);
- int (*tu_disconnect)(struct tcpcb *tp);
- int (*tu_reset)(struct tcpcb *tp);
- void (*tu_detach)(struct tcpcb *tp);
- void (*tu_syncache_event)(int event, void *toep);
-};
+extern int registered_toedevs;
-/*
- * Proxy for struct tcpopt between TOE drivers and TCP functions.
- */
-struct toeopt {
- u_int64_t to_flags; /* see tcpopt in tcp_var.h */
- u_int16_t to_mss; /* maximum segment size */
- u_int8_t to_wscale; /* window scaling */
+int tcp_offload_connect(struct socket *, struct sockaddr *);
+void tcp_offload_listen_start(struct tcpcb *);
+void tcp_offload_listen_stop(struct tcpcb *);
+void tcp_offload_input(struct tcpcb *, struct mbuf *);
+int tcp_offload_output(struct tcpcb *);
+void tcp_offload_rcvd(struct tcpcb *);
+void tcp_offload_ctloutput(struct tcpcb *, int, int);
+void tcp_offload_detach(struct tcpcb *);
- u_int8_t _pad1; /* explicit pad for 64bit alignment */
- u_int32_t _pad2; /* explicit pad for 64bit alignment */
- u_int64_t _pad3[4]; /* TBD */
-};
-
-#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */
-#define TOE_SC_DROP 2 /* connection was timed out */
-
-/*
- * Because listen is a one-to-many relationship (a socket can be listening
- * on all interfaces on a machine some of which may be using different TCP
- * offload devices), listen uses a publish/subscribe mechanism. The TCP
- * offload driver registers a listen notification function with the stack.
- * When a listen socket is created all TCP offload devices are notified
- * so that they can do the appropriate set up to offload connections on the
- * port to which the socket is bound. When the listen socket is closed,
- * the offload devices are notified so that they will stop listening on that
- * port and free any associated resources as well as sending RSTs on any
- * connections in the SYN_RCVD state.
- *
- */
-
-typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
-typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
-
-EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
-EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
-
-/*
- * Check if the socket can be offloaded by the following steps:
- * - determine the egress interface
- * - check the interface for TOE capability and TOE is enabled
- * - check if the device has resources to offload the connection
- */
-int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
-
-/*
- * The tcp_output_* routines are wrappers around the toe_usrreqs calls
- * which trigger packet transmission. In the non-offloaded case they
- * translate to tcp_output. The tcp_offload_* routines notify TOE
- * of specific events. I the non-offloaded case they are no-ops.
- *
- * Listen is a special case because it is a 1 to many relationship
- * and there can be more than one offload driver in the system.
- */
-
-/*
- * Connection is offloaded
- */
-#define tp_offload(tp) ((tp)->t_flags & TF_TOE)
-
-/*
- * hackish way of allowing this file to also be included by TOE
- * which needs to be kept ignorant of socket implementation details
- */
-#ifdef _SYS_SOCKETVAR_H_
-/*
- * The socket has not been marked as "do not offload"
- */
-#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0)
-
-static __inline int
-tcp_output_connect(struct socket *so, struct sockaddr *nam)
-{
- struct tcpcb *tp = sototcpcb(so);
- int error;
-
- /*
- * If offload has been disabled for this socket or the
- * connection cannot be offloaded just call tcp_output
- * to start the TCP state machine.
- */
-#ifndef TCP_OFFLOAD_DISABLE
- if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
-#endif
- error = tcp_output(tp);
- return (error);
-}
-
-static __inline int
-tcp_output_send(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_send(tp));
#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_rcvd(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_rcvd(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_disconnect(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_disconnect(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_reset(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_reset(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline void
-tcp_offload_detach(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- tp->t_tu->tu_detach(tp);
-#endif
-}
-
-static __inline void
-tcp_offload_listen_open(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
- EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
-#endif
-}
-
-static __inline void
-tcp_offload_listen_close(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
-#endif
-}
-#undef SO_OFFLOADABLE
-#endif /* _SYS_SOCKETVAR_H_ */
-#undef tp_offload
-
-void tcp_offload_twstart(struct tcpcb *tp);
-struct tcpcb *tcp_offload_close(struct tcpcb *tp);
-struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
-
-#endif /* _NETINET_TCP_OFFLOAD_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_output.c
--- a/head/sys/netinet/tcp_output.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_output.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/tcp_output.c 234342 2012-04-16 13:49:03Z glebius $");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_output.c 238516 2012-07-16 07:08:34Z glebius $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -75,6 +75,9 @@
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -177,7 +180,7 @@
int idle, sendalot;
int sack_rxmit, sack_bytes_rxmt;
struct sackhole *p;
- int tso;
+ int tso, mtu;
struct tcpopt to;
#if 0
int maxburst = TCP_MAXBURST;
@@ -191,6 +194,11 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ return (tcp_offload_output(tp));
+#endif
+
/*
* Determine length of data that should be transmitted,
* and flags that will be used.
@@ -218,6 +226,7 @@
tcp_sack_adjust(tp);
sendalot = 0;
tso = 0;
+ mtu = 0;
off = tp->snd_nxt - tp->snd_una;
sendwin = min(tp->snd_wnd, tp->snd_cwnd);
@@ -1047,19 +1056,24 @@
* checksum extended header and data.
*/
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
- if (isipv6)
+ if (isipv6) {
/*
* ip6_plen is not need to be filled now, and will be filled
* in ip6_output.
*/
- th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
- sizeof(struct tcphdr) + optlen + len);
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
+ optlen + len, IPPROTO_TCP, 0);
+ }
+#endif
+#if defined(INET6) && defined(INET)
else
-#endif /* INET6 */
+#endif
+#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
@@ -1067,6 +1081,7 @@
KASSERT(ip->ip_v == IPVERSION,
("%s: IP version incorrect: %d", __func__, ip->ip_v));
}
+#endif
/*
* Enable TSO and specify the size of the segments.
@@ -1195,6 +1210,9 @@
*/
#ifdef INET6
if (isipv6) {
+ struct route_in6 ro;
+
+ bzero(&ro, sizeof(ro));
/*
* we separately set hoplimit for every segment, since the
* user might want to change the value via setsockopt.
@@ -1204,10 +1222,13 @@
ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
/* TODO: IPv6 IP6TOS_ECT bit on */
- error = ip6_output(m,
- tp->t_inpcb->in6p_outputopts, NULL,
- ((so->so_options & SO_DONTROUTE) ?
- IP_ROUTETOIF : 0), NULL, NULL, tp->t_inpcb);
+ error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro,
+ ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
+ NULL, NULL, tp->t_inpcb);
+
+ if (error == EMSGSIZE && ro.ro_rt != NULL)
+ mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ RO_RTFREE(&ro);
}
#endif /* INET6 */
#if defined(INET) && defined(INET6)
@@ -1215,6 +1236,9 @@
#endif
#ifdef INET
{
+ struct route ro;
+
+ bzero(&ro, sizeof(ro));
ip->ip_len = m->m_pkthdr.len;
#ifdef INET6
if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
@@ -1231,9 +1255,13 @@
if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
ip->ip_off |= IP_DF;
- error = ip_output(m, tp->t_inpcb->inp_options, NULL,
+ error = ip_output(m, tp->t_inpcb->inp_options, &ro,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
tp->t_inpcb);
+
+ if (error == EMSGSIZE && ro.ro_rt != NULL)
+ mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ RO_RTFREE(&ro);
}
#endif /* INET */
if (error) {
@@ -1280,21 +1308,18 @@
* For some reason the interface we used initially
* to send segments changed to another or lowered
* its MTU.
- *
- * tcp_mtudisc() will find out the new MTU and as
- * its last action, initiate retransmission, so it
- * is important to not do so here.
- *
* If TSO was active we either got an interface
* without TSO capabilits or TSO was turned off.
- * Disable it for this connection as too and
- * immediatly retry with MSS sized segments generated
- * by this function.
+ * If we obtained mtu from ip_output() then update
+ * it and try again.
*/
if (tso)
tp->t_flags &= ~TF_TSO;
- tcp_mtudisc(tp->t_inpcb, -1);
- return (0);
+ if (mtu != 0) {
+ tcp_mss_update(tp, -1, mtu, NULL, NULL);
+ goto again;
+ }
+ return (error);
case EHOSTDOWN:
case EHOSTUNREACH:
case ENETDOWN:
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_subr.c
--- a/head/sys/netinet/tcp_subr.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_subr.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/tcp_subr.c 234342 2012-04-16 13:49:03Z glebius $");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_subr.c 237263 2012-06-19 07:34:13Z np $");
#include "opt_compat.h"
#include "opt_inet.h"
@@ -85,7 +85,6 @@
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -96,6 +95,9 @@
#ifdef INET6
#include <netinet6/ip6protosw.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -573,8 +575,7 @@
ip6->ip6_flow = 0;
ip6->ip6_vfc = IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_TCP;
- ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) +
- tlen));
+ ip6->ip6_plen = 0; /* Set in ip6_output(). */
tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
}
#endif
@@ -619,12 +620,13 @@
else
nth->th_win = htons((u_short)win);
nth->th_urp = 0;
+
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
- nth->th_sum = 0;
- nth->th_sum = in6_cksum(m, IPPROTO_TCP,
- sizeof(struct ip6_hdr),
- tlen - sizeof(struct ip6_hdr));
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ nth->th_sum = in6_cksum_pseudo(ip6,
+ tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
NULL, NULL);
}
@@ -634,10 +636,9 @@
#endif
#ifdef INET
{
+ m->m_pkthdr.csum_flags = CSUM_TCP;
nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
- m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
}
#endif /* INET */
#ifdef TCPDEBUG
@@ -825,7 +826,7 @@
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tp->t_state = TCPS_CLOSED;
- (void) tcp_output_reset(tp);
+ (void) tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@@ -925,8 +926,12 @@
/* free the reassembly queue, if any */
tcp_reass_flush(tp);
+
+#ifdef TCP_OFFLOAD
/* Disconnect offload device, if any. */
- tcp_offload_detach(tp);
+ if (tp->t_flags & TF_TOE)
+ tcp_offload_detach(tp);
+#endif
tcp_free_sackholes(tp);
@@ -955,9 +960,10 @@
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
- /* Notify any offload devices of listener close */
+#ifdef TCP_OFFLOAD
if (tp->t_state == TCPS_LISTEN)
- tcp_offload_listen_close(tp);
+ tcp_offload_listen_stop(tp);
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
@@ -1696,7 +1702,7 @@
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
- tcp_output_send(tp);
+ tcp_output(tp);
return (inp);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_syncache.c
--- a/head/sys/netinet/tcp_syncache.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_syncache.c Wed Jul 25 16:40:53 2012 +0300
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/tcp_syncache.c 231767 2012-02-15 16:09:56Z bz $");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_syncache.c 237263 2012-06-19 07:34:13Z np $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -81,10 +81,12 @@
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/toecore.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -110,10 +112,8 @@
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
-#ifdef TCP_OFFLOAD_DISABLE
-#define TOEPCB_ISSET(sc) (0)
-#else
-#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
+#ifdef TCP_OFFLOAD
+#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
#endif
static void syncache_drop(struct syncache *, struct syncache_head *);
@@ -332,6 +332,14 @@
TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
sch->sch_length++;
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_added(tod, sc->sc_todctx);
+ }
+#endif
+
/* Reinitialize the bucket row's timer. */
if (sch->sch_length == 1)
sch->sch_nextc = ticks + INT_MAX;
@@ -356,10 +364,14 @@
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
-#ifndef TCP_OFFLOAD_DISABLE
- if (sc->sc_tu)
- sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
-#endif
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_removed(tod, sc->sc_todctx);
+ }
+#endif
+
syncache_free(sc);
V_tcp_syncache.cache_count--;
}
@@ -846,6 +858,18 @@
if (sc->sc_rxmits > 1)
tp->snd_cwnd = tp->t_maxseg;
+#ifdef TCP_OFFLOAD
+ /*
+ * Allow a TOE driver to install its hooks. Note that we hold the
+ * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
+ * new connection before the TOE driver has done its thing.
+ */
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_offload_socket(tod, sc->sc_todctx, so);
+ }
+#endif
/*
* Copy and activate timers.
*/
@@ -926,6 +950,13 @@
/* Pull out the entry to unlock the bucket row. */
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_removed(tod, sc->sc_todctx);
+ }
+#endif
V_tcp_syncache.cache_count--;
SCH_UNLOCK(sch);
}
@@ -934,7 +965,7 @@
* Segment validation:
* ACK must match our initial sequence number + 1 (the SYN|ACK).
*/
- if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
+ if (th->th_ack != sc->sc_iss + 1) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
"rejected\n", s, __func__, th->th_ack, sc->sc_iss);
@@ -945,9 +976,8 @@
* The SEQ must fall in the window starting at the received
* initial receive sequence number + 1 (the SYN).
*/
- if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
- SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
- !TOEPCB_ISSET(sc)) {
+ if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+ SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
"rejected\n", s, __func__, th->th_seq, sc->sc_irs);
@@ -964,8 +994,7 @@
* If timestamps were negotiated the reflected timestamp
* must be equal to what we actually sent in the SYN|ACK.
*/
- if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
- !TOEPCB_ISSET(sc)) {
+ if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
"segment rejected\n",
@@ -993,25 +1022,6 @@
return (0);
}
-int
-tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct socket **lsop, struct mbuf *m)
-{
- struct tcpopt to;
- int rc;
-
- bzero(&to, sizeof(struct tcpopt));
- to.to_mss = toeo->to_mss;
- to.to_wscale = toeo->to_wscale;
- to.to_flags = toeo->to_flags;
-
- INP_INFO_WLOCK(&V_tcbinfo);
- rc = syncache_expand(inc, &to, th, lsop, m);
- INP_INFO_WUNLOCK(&V_tcbinfo);
-
- return (rc);
-}
-
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@@ -1025,10 +1035,10 @@
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
*/
-static void
-_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m,
- struct toe_usrreqs *tu, void *toepcb)
+void
+syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
+ void *todctx)
{
struct tcpcb *tp;
struct socket *so;
@@ -1114,11 +1124,6 @@
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
-#ifndef TCP_OFFLOAD_DISABLE
- if (sc->sc_tu)
- sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
- sc->sc_toepcb);
-#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1151,7 +1156,7 @@
s, __func__);
free(s, M_TCPLOG);
}
- if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
+ if (syncache_respond(sc) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@@ -1202,9 +1207,9 @@
sc->sc_ip_tos = ip_tos;
sc->sc_ip_ttl = ip_ttl;
}
-#ifndef TCP_OFFLOAD_DISABLE
- sc->sc_tu = tu;
- sc->sc_toepcb = toepcb;
+#ifdef TCP_OFFLOAD
+ sc->sc_tod = tod;
+ sc->sc_todctx = todctx;
#endif
sc->sc_irs = th->th_seq;
sc->sc_iss = arc4random();
@@ -1299,7 +1304,7 @@
/*
* Do a standard 3-way handshake.
*/
- if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
+ if (syncache_respond(sc) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@@ -1473,11 +1478,12 @@
optlen = 0;
M_SETFIB(m, sc->sc_inc.inc_fibnum);
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
- th->th_sum = 0;
- th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen,
- tlen + optlen - hlen);
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
+ IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
}
@@ -1487,41 +1493,24 @@
#endif
#ifdef INET
{
+ m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
- m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
+
+ return (error);
+ }
+#endif
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
#endif
return (error);
}
-void
-syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m)
-{
- _syncache_add(inc, to, th, inp, lsop, m, NULL, NULL);
-}
-
-void
-tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
- struct toe_usrreqs *tu, void *toepcb)
-{
- struct tcpopt to;
-
- bzero(&to, sizeof(struct tcpopt));
- to.to_mss = toeo->to_mss;
- to.to_wscale = toeo->to_wscale;
- to.to_flags = toeo->to_flags;
-
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
-
- _syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
-}
-
/*
* The purpose of SYN cookies is to avoid keeping track of all SYN's we
* receive and to be able to handle SYN floods from bogus source addresses
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_syncache.h
--- a/head/sys/netinet/tcp_syncache.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_syncache.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,15 +27,13 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD: head/sys/netinet/tcp_syncache.h 224151 2011-07-17 21:15:20Z bz $
+ * $FreeBSD: head/sys/netinet/tcp_syncache.h 237263 2012-06-19 07:34:13Z np $
*/
#ifndef _NETINET_TCP_SYNCACHE_H_
#define _NETINET_TCP_SYNCACHE_H_
#ifdef _KERNEL
-struct toeopt;
-
void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
@@ -43,14 +41,9 @@
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
-int tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct socket **lsop, struct mbuf *m);
void syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
-void tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
- struct tcphdr *, struct inpcb *, struct socket **,
- struct toe_usrreqs *tu, void *toepcb);
-
+ struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
+ void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
int syncache_pcbcount(void);
@@ -75,10 +68,10 @@
u_int8_t sc_requested_s_scale:4,
sc_requested_r_scale:4;
u_int16_t sc_flags;
-#ifndef TCP_OFFLOAD_DISABLE
- struct toe_usrreqs *sc_tu; /* TOE operations */
- void *sc_toepcb; /* TOE protocol block */
-#endif
+#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
+ struct toedev *sc_tod; /* entry added by this TOE */
+ void *sc_todctx; /* TOE driver context */
+#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_timer.c
--- a/head/sys/netinet/tcp_timer.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_timer.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 231025 2012-02-05 16:53:02Z glebius $");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 237263 2012-06-19 07:34:13Z np $");
#include "opt_inet6.h"
#include "opt_tcpdebug.h"
@@ -602,6 +602,11 @@
struct inpcb *inp = tp->t_inpcb;
int cpu = INP_CPU(inp);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ return;
+#endif
+
switch (timer_type) {
case TT_DELACK:
t_callout = &tp->t_timers->tt_delack;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_timewait.c
--- a/head/sys/netinet/tcp_timewait.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_timewait.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/tcp_timewait.c 231767 2012-02-15 16:09:56Z bz $");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_timewait.c 236170 2012-05-28 09:30:13Z bz $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -574,10 +574,12 @@
th->th_flags = flags;
th->th_win = htons(tw->last_win);
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
- th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
- sizeof(struct tcphdr) + optlen);
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ th->th_sum = in6_cksum_pseudo(ip6,
+ sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
error = ip6_output(m, inp->in6p_outputopts, NULL,
(tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
@@ -588,10 +590,9 @@
#endif
#ifdef INET
{
+ m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
- m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
ip->ip_len = m->m_pkthdr.len;
if (V_path_mtu_discovery)
ip->ip_off |= IP_DF;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_usrreq.c
--- a/head/sys/netinet/tcp_usrreq.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_usrreq.c Wed Jul 25 16:40:53 2012 +0300
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/tcp_usrreq.c 231025 2012-02-05 16:53:02Z glebius $");
+__FBSDID("$FreeBSD: head/sys/netinet/tcp_usrreq.c 237263 2012-06-19 07:34:13Z np $");
#include "opt_ddb.h"
#include "opt_inet.h"
@@ -87,7 +87,9 @@
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
+#endif
/*
* TCP protocol interface to socket abstraction.
@@ -367,7 +369,9 @@
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
- tcp_offload_listen_open(tp);
+#ifdef TCP_OFFLOAD
+ tcp_offload_listen_start(tp);
+#endif
}
SOCK_UNLOCK(so);
@@ -409,6 +413,9 @@
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
+#ifdef TCP_OFFLOAD
+ tcp_offload_listen_start(tp);
+#endif
}
SOCK_UNLOCK(so);
@@ -459,7 +466,13 @@
TCPDEBUG1();
if ((error = tcp_connect(tp, nam, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
@@ -519,7 +532,12 @@
goto out;
if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ error = tcp_output(tp);
goto out;
}
#endif
@@ -530,7 +548,13 @@
goto out;
if ((error = tcp6_connect(tp, nam, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
@@ -709,7 +733,7 @@
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- error = tcp_output_disconnect(tp);
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
@@ -739,7 +763,11 @@
}
tp = intotcpcb(inp);
TCPDEBUG1();
- tcp_output_rcvd(tp);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ tcp_offload_rcvd(tp);
+#endif
+ tcp_output(tp);
out:
TCPDEBUG2(PRU_RCVD);
@@ -835,7 +863,7 @@
if (!(inp->inp_flags & INP_DROPPED)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tcp_output_send(tp);
+ error = tcp_output(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@@ -884,7 +912,7 @@
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flags |= TF_FORCEDATA;
- error = tcp_output_send(tp);
+ error = tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
out:
@@ -1119,7 +1147,6 @@
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1192,7 +1219,6 @@
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1323,9 +1349,9 @@
tp->t_flags |= TF_SIGNATURE;
else
tp->t_flags &= ~TF_SIGNATURE;
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
#endif /* TCP_SIGNATURE */
+
case TCP_NODELAY:
case TCP_NOOPT:
INP_WUNLOCK(inp);
@@ -1351,6 +1377,13 @@
tp->t_flags |= opt;
else
tp->t_flags &= ~opt;
+unlock_and_done:
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_ctloutput(tp, sopt->sopt_dir,
+ sopt->sopt_name);
+ }
+#endif
INP_WUNLOCK(inp);
break;
@@ -1369,8 +1402,7 @@
if (TCPS_HAVEESTABLISHED(tp->t_state))
error = tcp_output(tp);
}
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_MAXSEG:
INP_WUNLOCK(inp);
@@ -1385,8 +1417,7 @@
tp->t_maxseg = optval;
else
error = EINVAL;
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_INFO:
INP_WUNLOCK(inp);
@@ -1438,8 +1469,7 @@
}
}
CC_LIST_RUNLOCK();
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
@@ -1491,8 +1521,7 @@
TP_KEEPINIT(tp));
break;
}
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
default:
INP_WUNLOCK(inp);
@@ -1635,7 +1664,7 @@
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- tcp_output_disconnect(tp);
+ tcp_output(tp);
}
}
@@ -1658,7 +1687,9 @@
switch (tp->t_state) {
case TCPS_LISTEN:
- tcp_offload_listen_close(tp);
+#ifdef TCP_OFFLOAD
+ tcp_offload_listen_stop(tp);
+#endif
/* FALLTHROUGH */
case TCPS_CLOSED:
tp->t_state = TCPS_CLOSED;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/tcp_var.h
--- a/head/sys/netinet/tcp_var.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/tcp_var.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD: head/sys/netinet/tcp_var.h 234342 2012-04-16 13:49:03Z glebius $
+ * $FreeBSD: head/sys/netinet/tcp_var.h 237263 2012-06-19 07:34:13Z np $
*/
#ifndef _NETINET_TCP_VAR_H_
@@ -194,7 +194,7 @@
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
- struct toe_usrreqs *t_tu; /* offload operations vector */
+ struct toedev *tod; /* toedev handling this connection */
int t_sndrexmitpack; /* retransmit packets sent */
int t_rcvoopack; /* out-of-order packets received */
void *t_toe; /* TOE pcb pointer */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/toecore.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/netinet/toecore.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,575 @@
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/toecore.c 237263 2012-06-19 07:34:13Z np $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/types.h>
+#include <sys/sockopt.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/if_llatbl.h>
+#include <net/route.h>
+
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet6/nd6.h>
+#define TCPSTATES
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_offload.h>
+#include <netinet/toecore.h>
+
+static struct mtx toedev_lock;
+static TAILQ_HEAD(, toedev) toedev_list;
+static eventhandler_tag listen_start_eh;
+static eventhandler_tag listen_stop_eh;
+static eventhandler_tag lle_event_eh;
+static eventhandler_tag route_redirect_eh;
+
+static int
+toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
+ struct rtentry *rt __unused, struct sockaddr *nam __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static int
+toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static int
+toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static void
+toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ struct mbuf *m)
+{
+
+ m_freem(m);
+ return;
+}
+
+static void
+toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return;
+}
+
+static int
+toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static void
+toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return;
+}
+
+static void
+toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
+ struct sockaddr *sa __unused, uint8_t *lladdr __unused,
+ uint16_t vtag __unused)
+{
+
+ return;
+}
+
+static void
+toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
+ struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
+{
+
+ return;
+}
+
+static void
+toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
+{
+
+ return;
+}
+
+static void
+toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
+{
+
+ return;
+}
+
+static int
+toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
+ struct mbuf *m)
+{
+
+ m_freem(m);
+ return (0);
+}
+
+static void
+toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
+ struct socket *so __unused)
+{
+
+ return;
+}
+
+static void
+toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ int sopt_dir __unused, int sopt_name __unused)
+{
+
+ return;
+}
+
+/*
+ * Inform one or more TOE devices about a listening socket.
+ */
+static void
+toe_listen_start(struct inpcb *inp, void *arg)
+{
+ struct toedev *t, *tod;
+ struct tcpcb *tp;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
+ ("%s: inp is not a TCP inp", __func__));
+
+ if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
+ return;
+
+ tp = intotcpcb(inp);
+ if (tp->t_state != TCPS_LISTEN)
+ return;
+
+ t = arg;
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(tod, &toedev_list, link) {
+ if (t == NULL || t == tod)
+ tod->tod_listen_start(tod, tp);
+ }
+ mtx_unlock(&toedev_lock);
+}
+
+static void
+toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
+{
+ struct inpcb *inp = tp->t_inpcb;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_state == TCPS_LISTEN,
+ ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+ toe_listen_start(inp, NULL);
+}
+
+static void
+toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
+{
+ struct toedev *tod;
+#ifdef INVARIANTS
+ struct inpcb *inp = tp->t_inpcb;
+#endif
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_state == TCPS_LISTEN,
+ ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(tod, &toedev_list, link)
+ tod->tod_listen_stop(tod, tp);
+ mtx_unlock(&toedev_lock);
+}
+
+/*
+ * Fill up a freshly allocated toedev struct with reasonable defaults.
+ */
+void
+init_toedev(struct toedev *tod)
+{
+
+ tod->tod_softc = NULL;
+
+ /*
+ * Provide no-op defaults so that the kernel can call any toedev
+ * function without having to check whether the TOE driver supplied one
+ * or not.
+ */
+ tod->tod_connect = toedev_connect;
+ tod->tod_listen_start = toedev_listen_start;
+ tod->tod_listen_stop = toedev_listen_stop;
+ tod->tod_input = toedev_input;
+ tod->tod_rcvd = toedev_rcvd;
+ tod->tod_output = toedev_output;
+ tod->tod_send_rst = toedev_output;
+ tod->tod_send_fin = toedev_output;
+ tod->tod_pcb_detach = toedev_pcb_detach;
+ tod->tod_l2_update = toedev_l2_update;
+ tod->tod_route_redirect = toedev_route_redirect;
+ tod->tod_syncache_added = toedev_syncache_added;
+ tod->tod_syncache_removed = toedev_syncache_removed;
+ tod->tod_syncache_respond = toedev_syncache_respond;
+ tod->tod_offload_socket = toedev_offload_socket;
+ tod->tod_ctloutput = toedev_ctloutput;
+}
+
+/*
+ * Register an active TOE device with the system. This allows it to receive
+ * notifications from the kernel.
+ */
+int
+register_toedev(struct toedev *tod)
+{
+ struct toedev *t;
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(t, &toedev_list, link) {
+ if (t == tod) {
+ mtx_unlock(&toedev_lock);
+ return (EEXIST);
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&toedev_list, tod, link);
+ registered_toedevs++;
+ mtx_unlock(&toedev_lock);
+
+ inp_apply_all(toe_listen_start, tod);
+
+ return (0);
+}
+
+/*
+ * Remove the TOE device from the global list of active TOE devices. It is the
+ * caller's responsibility to ensure that the TOE device is quiesced prior to
+ * this call.
+ */
+int
+unregister_toedev(struct toedev *tod)
+{
+ struct toedev *t, *t2;
+ int rc = ENODEV;
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
+ if (t == tod) {
+ TAILQ_REMOVE(&toedev_list, tod, link);
+ registered_toedevs--;
+ rc = 0;
+ break;
+ }
+ }
+ KASSERT(registered_toedevs >= 0,
+ ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
+ mtx_unlock(&toedev_lock);
+ return (rc);
+}
+
+void
+toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, void *tod, void *todctx)
+{
+ struct socket *lso = inp->inp_socket;
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_WLOCK_ASSERT(inp);
+
+ syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
+}
+
+int
+toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
+ struct tcphdr *th, struct socket **lsop)
+{
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+ return (syncache_expand(inc, to, th, lsop, NULL));
+}
+
+/*
+ * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP
+ * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
+ * in TIME_WAIT may be assassinated freeing it up for re-use.
+ *
+ * Note that the TCP header must have been run through tcp_fields_to_host() or
+ * equivalent.
+ */
+int
+toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ if (inc->inc_flags & INC_ISIPV6)
+ return (ENOSYS); /* XXX: implement */
+
+ inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
+ inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
+ if (inp != NULL) {
+ INP_WLOCK_ASSERT(inp);
+
+ if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
+ if (!tcp_twcheck(inp, NULL, th, NULL, 0))
+ return (EADDRINUSE);
+ } else {
+ INP_WUNLOCK(inp);
+ return (EADDRINUSE);
+ }
+ }
+
+ return (0);
+}
+
+static void
+toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
+{
+ struct toedev *tod;
+ struct ifnet *ifp;
+ struct sockaddr *sa;
+ uint8_t *lladdr;
+ uint16_t vtag;
+
+ LLE_WLOCK_ASSERT(lle);
+
+ ifp = lle->lle_tbl->llt_ifp;
+ sa = L3_ADDR(lle);
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: lle_event %d for lle %p but sa %p !INET && !INET6",
+ __func__, evt, lle, sa));
+
+ /*
+ * Not interested if the interface's TOE capability is not enabled.
+ */
+ if ((sa->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
+ (sa->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
+ return;
+
+ tod = TOEDEV(ifp);
+ if (tod == NULL)
+ return;
+
+ vtag = 0xfff;
+ if (evt != LLENTRY_RESOLVED) {
+
+ /*
+ * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
+ * this entry is going to be deleted.
+ */
+
+ lladdr = NULL;
+ } else {
+
+ KASSERT(lle->la_flags & LLE_VALID,
+ ("%s: %p resolved but not valid?", __func__, lle));
+
+ lladdr = (uint8_t *)&lle->ll_addr;
+#ifdef VLAN_TAG
+ VLAN_TAG(ifp, &vtag);
+#endif
+ }
+
+ tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
+}
+
+/*
+ * XXX: implement.
+ */
+static void
+toe_route_redirect_event(void *arg __unused, struct rtentry *rt0,
+ struct rtentry *rt1, struct sockaddr *sa)
+{
+
+ return;
+}
+
+/*
+ * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means
+ * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
+ * tod_l2_update will be called later, when the entry is resolved or times out.
+ */
+int
+toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+ uint8_t *lladdr, uint16_t *vtag)
+{
+ struct llentry *lle;
+ int rc;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ rc = arpresolve(ifp, NULL, NULL, sa, lladdr, &lle);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ rc = nd6_storelladdr(ifp, NULL, sa, lladdr, &lle);
+ break;
+#endif
+ default:
+ return (EPROTONOSUPPORT);
+ }
+
+ if (rc == 0) {
+#ifdef VLAN_TAG
+ if (VLAN_TAG(ifp, vtag) != 0)
+#endif
+ *vtag = 0xfff;
+ }
+
+ return (rc);
+}
+
+void
+toe_connect_failed(struct toedev *tod, struct tcpcb *tp, int err)
+{
+ struct inpcb *inp = tp->t_inpcb;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_flags & TF_TOE,
+ ("%s: tp %p not offloaded.", __func__, tp));
+
+ if (!(inp->inp_flags & INP_DROPPED)) {
+ if (err == EAGAIN) {
+
+ /*
+ * Temporary failure during offload, take this PCB back.
+ * Detach from the TOE driver and do the rest of what
+ * TCP's pru_connect would have done if the connection
+ * wasn't offloaded.
+ */
+
+ tod->tod_pcb_detach(tod, tp);
+ KASSERT(!(tp->t_flags & TF_TOE),
+ ("%s: tp %p still offloaded.", __func__, tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ (void) tcp_output(tp);
+ } else {
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ tp = tcp_drop(tp, err);
+ if (tp == NULL)
+ INP_WLOCK(inp); /* re-acquire */
+ }
+ }
+ INP_WLOCK_ASSERT(inp);
+}
+
+static int
+toecore_load(void)
+{
+
+ mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
+ TAILQ_INIT(&toedev_list);
+
+ listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
+ toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
+ listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
+ toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
+ lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
+ EVENTHANDLER_PRI_ANY);
+ route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event,
+ toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY);
+
+ return (0);
+}
+
+static int
+toecore_unload(void)
+{
+
+ mtx_lock(&toedev_lock);
+ if (!TAILQ_EMPTY(&toedev_list)) {
+ mtx_unlock(&toedev_lock);
+ return (EBUSY);
+ }
+
+ EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
+ EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
+ EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+ EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh);
+
+ mtx_unlock(&toedev_lock);
+ mtx_destroy(&toedev_lock);
+
+ return (0);
+}
+
+static int
+toecore_mod_handler(module_t mod, int cmd, void *arg)
+{
+
+ if (cmd == MOD_LOAD)
+ return (toecore_load());
+
+ if (cmd == MOD_UNLOAD)
+ return (toecore_unload());
+
+ return (EOPNOTSUPP);
+}
+
+static moduledata_t mod_data= {
+ "toecore",
+ toecore_mod_handler,
+ 0
+};
+
+MODULE_VERSION(toecore, 1);
+DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/toecore.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/netinet/toecore.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,130 @@
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/netinet/toecore.h 237263 2012-06-19 07:34:13Z np $
+ */
+
+#ifndef _NETINET_TOE_H_
+#define _NETINET_TOE_H_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+struct tcpopt;
+struct tcphdr;
+struct in_conninfo;
+
+struct toedev {
+ TAILQ_ENTRY(toedev) link; /* glue for toedev_list */
+ void *tod_softc; /* TOE driver private data */
+
+ /*
+ * Active open. If a failure occurs, it is reported back by the driver
+ * via toe_connect_failed.
+ */
+ int (*tod_connect)(struct toedev *, struct socket *, struct rtentry *,
+ struct sockaddr *);
+
+ /* Passive open. */
+ int (*tod_listen_start)(struct toedev *, struct tcpcb *);
+ int (*tod_listen_stop)(struct toedev *, struct tcpcb *);
+
+ /*
+ * The kernel uses this routine to pass on any frame it receives for an
+ * offloaded connection to the TOE driver. This is an unusual event.
+ */
+ void (*tod_input)(struct toedev *, struct tcpcb *, struct mbuf *);
+
+ /*
+ * This is called by the kernel during pru_rcvd for an offloaded TCP
+ * connection and provides an opportunity for the TOE driver to manage
+ * its rx window and credits.
+ */
+ void (*tod_rcvd)(struct toedev *, struct tcpcb *);
+
+ /*
+ * Transmit routine. The kernel calls this to have the TOE driver
+ * evaluate whether there is data to be transmitted, and transmit it.
+ */
+ int (*tod_output)(struct toedev *, struct tcpcb *);
+
+ /* Immediate teardown: send RST to peer. */
+ int (*tod_send_rst)(struct toedev *, struct tcpcb *);
+
+ /* Initiate orderly disconnect by sending FIN to the peer. */
+ int (*tod_send_fin)(struct toedev *, struct tcpcb *);
+
+ /* Called to indicate that the kernel is done with this TCP PCB. */
+ void (*tod_pcb_detach)(struct toedev *, struct tcpcb *);
+
+ /*
+ * The kernel calls this once it has information about an L2 entry that
+ * the TOE driver enquired about previously (via toe_l2_resolve).
+ */
+ void (*tod_l2_update)(struct toedev *, struct ifnet *,
+ struct sockaddr *, uint8_t *, uint16_t);
+
+ /* XXX. Route has been redirected. */
+ void (*tod_route_redirect)(struct toedev *, struct ifnet *,
+ struct rtentry *, struct rtentry *);
+
+ /* Syncache interaction. */
+ void (*tod_syncache_added)(struct toedev *, void *);
+ void (*tod_syncache_removed)(struct toedev *, void *);
+ int (*tod_syncache_respond)(struct toedev *, void *, struct mbuf *);
+ void (*tod_offload_socket)(struct toedev *, void *, struct socket *);
+
+ /* TCP socket option */
+ void (*tod_ctloutput)(struct toedev *, struct tcpcb *, int, int);
+};
+
+#include <sys/eventhandler.h>
+typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+void init_toedev(struct toedev *);
+int register_toedev(struct toedev *);
+int unregister_toedev(struct toedev *);
+
+/*
+ * General interface for looking up L2 information for an IP address. If an
+ * answer is not available right away then the TOE driver's tod_l2_update will
+ * be called later.
+ */
+int toe_l2_resolve(struct toedev *, struct ifnet *, struct sockaddr *,
+ uint8_t *, uint16_t *);
+
+void toe_connect_failed(struct toedev *, struct tcpcb *, int);
+
+void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+ struct inpcb *, void *, void *);
+int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+ struct socket **);
+
+int toe_4tuple_check(struct in_conninfo *, struct tcphdr *, struct ifnet *);
+#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/toedev.h
--- a/head/sys/netinet/toedev.h Wed Jul 25 16:32:50 2012 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-/*-
- * Copyright (c) 2007, Chelsio Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _NETINET_TOEDEV_H_
-#define _NETINET_TOEDEV_H_
-
-#ifndef _KERNEL
-#error "no user-serviceable parts inside"
-#endif
-
-extern uint32_t toedev_registration_count;
-
-/* Parameter values for offload_get_phys_egress(). */
-enum {
- TOE_OPEN,
- TOE_FAILOVER,
-};
-
-/* Parameter values for toe_failover(). */
-enum {
- TOE_ACTIVE_SLAVE,
- TOE_LINK_DOWN,
- TOE_LINK_UP,
- TOE_RELEASE,
- TOE_RELEASE_ALL,
-};
-
-#define TOENAMSIZ 16
-
-/* Get the toedev associated with a ifnet. */
-#define TOEDEV(ifp) ((ifp)->if_llsoftc)
-
-struct offload_id {
- unsigned int id;
- unsigned long data;
-};
-
-struct ifnet;
-struct rt_entry;
-struct tom_info;
-struct sysctl_oid;
-struct socket;
-struct mbuf;
-
-struct toedev {
- TAILQ_ENTRY(toedev) entry;
- char tod_name[TOENAMSIZ]; /* TOE device name */
- unsigned int tod_ttid; /* TOE type id */
- unsigned long tod_flags; /* device flags */
- unsigned int tod_mtu; /* max TX offloaded data */
- unsigned int tod_nconn; /* max # of offloaded
- * connections
- */
- struct ifnet *tod_lldev; /* first interface */
- const struct tom_info *tod_offload_mod; /* TCP offload module */
-
- /*
- * This TOE device is capable of offloading the connection for socket so
- */
- int (*tod_can_offload)(struct toedev *dev, struct socket *so);
-
- /*
- * Establish a connection to nam using the TOE device dev
- */
- int (*tod_connect)(struct toedev *dev, struct socket *so,
- struct rtentry *rt, struct sockaddr *nam);
- /*
- * Send an mbuf down to the toe device
- */
- int (*tod_send)(struct toedev *dev, struct mbuf *m);
- /*
- * Receive an array of mbufs from the TOE device dev
- */
- int (*tod_recv)(struct toedev *dev, struct mbuf **m, int n);
- /*
- * Device specific ioctl interface
- */
- int (*tod_ctl)(struct toedev *dev, unsigned int req, void *data);
- /*
- * Update L2 entry in toedev
- */
- void (*tod_arp_update)(struct toedev *dev, struct rtentry *neigh);
- /*
- * Failover from one toe device to another
- */
- void (*tod_failover)(struct toedev *dev, struct ifnet *bond_ifp,
- struct ifnet *ndev, int event);
- void *tod_priv; /* driver private data */
- void *tod_l2opt; /* optional layer 2 data */
- void *tod_l3opt; /* optional layer 3 data */
- void *tod_l4opt; /* optional layer 4 data */
- void *tod_ulp; /* upper lever protocol */
-};
-
-struct tom_info {
- TAILQ_ENTRY(tom_info) entry;
- int (*ti_attach)(struct toedev *dev,
- const struct offload_id *entry);
- int (*ti_detach)(struct toedev *dev);
- const char *ti_name;
- const struct offload_id *ti_id_table;
-};
-
-static __inline void
-init_offload_dev(struct toedev *dev)
-{
-}
-
-int register_tom(struct tom_info *t);
-int unregister_tom(struct tom_info *t);
-int register_toedev(struct toedev *dev, const char *name);
-int unregister_toedev(struct toedev *dev);
-int activate_offload(struct toedev *dev);
-int toe_send(struct toedev *dev, struct mbuf *m);
-void toe_arp_update(struct rtentry *rt);
-struct ifnet *offload_get_phys_egress(struct ifnet *ifp,
- struct socket *so, int context);
-int toe_receive_mbuf(struct toedev *dev, struct mbuf **m, int n);
-
-static __inline void
-toe_neigh_update(struct ifnet *ifp)
-{
-}
-
-static __inline void
-toe_failover(struct ifnet *bond_ifp, struct ifnet *fail_ifp, int event)
-{
-}
-
-static __inline int
-toe_enslave(struct ifnet *bond_ifp, struct ifnet *slave_ifp)
-{
- return (0);
-}
-
-#endif /* _NETINET_TOEDEV_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/netinet/udp_usrreq.c
--- a/head/sys/netinet/udp_usrreq.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/netinet/udp_usrreq.c Wed Jul 25 16:40:53 2012 +0300
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/udp_usrreq.c 233554 2012-03-27 15:14:29Z bz $");
+__FBSDID("$FreeBSD: head/sys/netinet/udp_usrreq.c 236961 2012-06-12 14:56:08Z tuexen $");
#include "opt_ipfw.h"
#include "opt_inet.h"
@@ -956,6 +956,7 @@
int ipflags;
u_short fport, lport;
int unlock_udbinfo;
+ u_char tos;
/*
* udp_output() may need to temporarily bind or connect the current
@@ -971,12 +972,15 @@
}
src.sin_family = 0;
+ INP_RLOCK(inp);
+ tos = inp->inp_ip_tos;
if (control != NULL) {
/*
* XXX: Currently, we assume all the optional information is
* stored in a single mbuf.
*/
if (control->m_next) {
+ INP_RUNLOCK(inp);
m_freem(control);
m_freem(m);
return (EINVAL);
@@ -1008,6 +1012,14 @@
*(struct in_addr *)CMSG_DATA(cm);
break;
+ case IP_TOS:
+ if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
+ error = EINVAL;
+ break;
+ }
+ tos = *(u_char *)CMSG_DATA(cm);
+ break;
+
default:
error = ENOPROTOOPT;
break;
@@ -1018,6 +1030,7 @@
m_freem(control);
}
if (error) {
+ INP_RUNLOCK(inp);
m_freem(m);
return (error);
}
@@ -1039,7 +1052,6 @@
* XXXRW: Check that hash locking update here is correct.
*/
sin = (struct sockaddr_in *)addr;
- INP_RLOCK(inp);
if (sin != NULL &&
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
INP_RUNLOCK(inp);
@@ -1223,7 +1235,7 @@
ui->ui_sum = 0;
((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
- ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
+ ((struct ip *)ui)->ip_tos = tos; /* XXX */
UDPSTAT_INC(udps_opackets);
if (unlock_udbinfo == UH_WLOCKED)
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/pc98/conf/GENERIC
--- a/head/sys/pc98/conf/GENERIC Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/pc98/conf/GENERIC Wed Jul 25 16:40:53 2012 +0300
@@ -16,7 +16,7 @@
# If you are in doubt as to the purpose or necessity of a line, check first
# in NOTES.
#
-# $FreeBSD: head/sys/pc98/conf/GENERIC 233271 2012-03-21 08:38:42Z ed $
+# $FreeBSD: head/sys/pc98/conf/GENERIC 235898 2012-05-24 11:20:51Z mav $
cpu I486_CPU
cpu I586_CPU
@@ -110,7 +110,7 @@
device sa # Sequential Access (tape etc)
device cd # CD
device pass # Passthrough device (direct ATA/SCSI access)
-device ses # SCSI Environmental Services (and SAF-TE)
+device ses # Enclosure Services (SES and SAF-TE)
# keyboard driver
device pckbd # PC98 keyboard
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/pc98/include/vdso.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/pc98/include/vdso.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: head/sys/pc98/include/vdso.h 237433 2012-06-22 07:06:40Z kib $ */
+
+#include <x86/vdso.h>
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/pc98/pc98/machdep.c
--- a/head/sys/pc98/pc98/machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/pc98/pc98/machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/pc98/pc98/machdep.c 233031 2012-03-16 12:13:44Z nyan $");
+__FBSDID("$FreeBSD: head/sys/pc98/pc98/machdep.c 238310 2012-07-09 20:42:08Z jhb $");
#include "opt_apic.h"
#include "opt_atalk.h"
@@ -73,6 +73,7 @@
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/memrange.h>
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
@@ -151,7 +152,6 @@
extern void printcpuinfo(void); /* XXX header file */
extern void finishidentcpu(void);
extern void panicifcpuunsupported(void);
-extern void initializecpu(void);
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
@@ -217,6 +217,8 @@
struct mtx icu_lock;
+struct mem_range_softc mem_range_softc;
+
static void
cpu_startup(dummy)
void *dummy;
@@ -271,6 +273,11 @@
bufinit();
vm_pager_bufferinit();
cpu_setregs();
+
+ /*
+ * Add BSP as an interrupt target.
+ */
+ intr_add_cpu(0);
}
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/aim/locore32.S
--- a/head/sys/powerpc/aim/locore32.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/aim/locore32.S Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/powerpc/aim/locore32.S 228605 2011-12-16 23:40:56Z nwhitehorn $ */
+/* $FreeBSD: head/sys/powerpc/aim/locore32.S 237737 2012-06-29 01:55:20Z rpaulo $ */
/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */
/*-
@@ -164,13 +164,14 @@
bl OF_initial_setup
+ lis 3,kernel_text at ha
+ addi 3,3,kernel_text at l
+
lis 4,end at ha
addi 4,4,end at l
+ add 4,4,3
mr 5,4
- lis 3,kernel_text at ha
- addi 3,3,kernel_text at l
-
/* Restore the argument pointer and length */
mr 6,20
mr 7,21
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/aim/locore64.S
--- a/head/sys/powerpc/aim/locore64.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/aim/locore64.S Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/powerpc/aim/locore64.S 230400 2012-01-20 22:34:19Z andreast $ */
+/* $FreeBSD: head/sys/powerpc/aim/locore64.S 237737 2012-06-29 01:55:20Z rpaulo $ */
/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */
/*-
@@ -164,13 +164,14 @@
bl OF_initial_setup
nop
+ lis 3,kernbase at ha
+ addi 3,3,kernbase at l
+
lis 4,end at ha
addi 4,4,end at l
+ add 4,4,3
mr 5,4
- lis 3,kernbase at ha
- addi 3,3,kernbase at l
-
/* Restore the argument pointer and length */
mr 6,20
mr 7,21
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/aim/mmu_oea.c
--- a/head/sys/powerpc/aim/mmu_oea.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/aim/mmu_oea.c Wed Jul 25 16:40:53 2012 +0300
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/aim/mmu_oea.c 234156 2012-04-11 22:23:50Z nwhitehorn $");
+__FBSDID("$FreeBSD: head/sys/powerpc/aim/mmu_oea.c 238357 2012-07-10 22:10:21Z alc $");
/*
* Manages physical address maps.
@@ -125,6 +125,7 @@
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
@@ -204,6 +205,17 @@
struct pvo_head moea_pvo_kunmanaged =
LIST_HEAD_INITIALIZER(moea_pvo_kunmanaged); /* list of unmanaged pages */
+/*
+ * Isolate the global pv list lock from data and other locks to prevent false
+ * sharing within the cache.
+ */
+static struct {
+ struct rwlock lock;
+ char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)];
+} pvh_global __aligned(CACHE_LINE_SIZE);
+
+#define pvh_global_lock pvh_global.lock
+
uma_zone_t moea_upvo_zone; /* zone for pvo entries for unmanaged pages */
uma_zone_t moea_mpvo_zone; /* zone for pvo entries for managed pages */
@@ -288,8 +300,8 @@
boolean_t moea_is_modified(mmu_t, vm_page_t);
boolean_t moea_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
boolean_t moea_is_referenced(mmu_t, vm_page_t);
-boolean_t moea_ts_referenced(mmu_t, vm_page_t);
-vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int);
+int moea_ts_referenced(mmu_t, vm_page_t);
+vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
boolean_t moea_page_exists_quick(mmu_t, pmap_t, vm_page_t);
int moea_page_wired_mappings(mmu_t, vm_page_t);
void moea_pinit(mmu_t, pmap_t);
@@ -308,14 +320,14 @@
void moea_deactivate(mmu_t, struct thread *);
void moea_cpu_bootstrap(mmu_t, int);
void moea_bootstrap(mmu_t, vm_offset_t, vm_offset_t);
-void *moea_mapdev(mmu_t, vm_offset_t, vm_size_t);
+void *moea_mapdev(mmu_t, vm_paddr_t, vm_size_t);
void *moea_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t);
void moea_unmapdev(mmu_t, vm_offset_t, vm_size_t);
-vm_offset_t moea_kextract(mmu_t, vm_offset_t);
+vm_paddr_t moea_kextract(mmu_t, vm_offset_t);
void moea_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t);
-void moea_kenter(mmu_t, vm_offset_t, vm_offset_t);
+void moea_kenter(mmu_t, vm_offset_t, vm_paddr_t);
void moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma);
-boolean_t moea_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t);
+boolean_t moea_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
static void moea_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t);
static mmu_method_t moea_methods[] = {
@@ -455,7 +467,7 @@
moea_attr_clear(vm_page_t m, int ptebit)
{
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
m->md.mdpg_attrs &= ~ptebit;
}
@@ -470,7 +482,7 @@
moea_attr_save(vm_page_t m, int ptebit)
{
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
m->md.mdpg_attrs |= ptebit;
}
@@ -857,7 +869,12 @@
for (i = 0; i < 16; i++)
kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
CPU_FILL(&kernel_pmap->pm_active);
- LIST_INIT(&kernel_pmap->pmap_pvo);
+ RB_INIT(&kernel_pmap->pmap_pvo);
+
+ /*
+ * Initialize the global pv list lock.
+ */
+ rw_init(&pvh_global_lock, "pmap pv global");
/*
* Set up the Open Firmware mappings
@@ -1066,10 +1083,10 @@
boolean_t wired)
{
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
moea_enter_locked(pmap, va, m, prot, wired);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -1102,7 +1119,7 @@
pvo_flags = PVO_MANAGED;
}
if (pmap_bootstrapped)
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
VM_OBJECT_LOCKED(m->object),
@@ -1166,14 +1183,14 @@
psize = atop(end - start);
m = m_start;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pm);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
moea_enter_locked(pm, start + ptoa(diff), m, prot &
(VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
m = TAILQ_NEXT(m, listq);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pm);
}
@@ -1182,11 +1199,11 @@
vm_prot_t prot)
{
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pm);
moea_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
FALSE);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pm);
}
@@ -1252,15 +1269,20 @@
boolean_t
moea_is_referenced(mmu_t mmu, vm_page_t m)
{
+ boolean_t rv;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("moea_is_referenced: page %p is not managed", m));
- return (moea_query_bit(m, PTE_REF));
+ rw_wlock(&pvh_global_lock);
+ rv = moea_query_bit(m, PTE_REF);
+ rw_wunlock(&pvh_global_lock);
+ return (rv);
}
boolean_t
moea_is_modified(mmu_t mmu, vm_page_t m)
{
+ boolean_t rv;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("moea_is_modified: page %p is not managed", m));
@@ -1274,7 +1296,10 @@
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return (FALSE);
- return (moea_query_bit(m, PTE_CHG));
+ rw_wlock(&pvh_global_lock);
+ rv = moea_query_bit(m, PTE_CHG);
+ rw_wunlock(&pvh_global_lock);
+ return (rv);
}
boolean_t
@@ -1296,7 +1321,9 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("moea_clear_reference: page %p is not managed", m));
+ rw_wlock(&pvh_global_lock);
moea_clear_bit(m, PTE_REF);
+ rw_wunlock(&pvh_global_lock);
}
void
@@ -1316,7 +1343,9 @@
*/
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
+ rw_wlock(&pvh_global_lock);
moea_clear_bit(m, PTE_CHG);
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -1342,7 +1371,7 @@
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
lo = moea_attr_fetch(m);
powerpc_sync();
LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
@@ -1368,7 +1397,7 @@
vm_page_dirty(m);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -1383,13 +1412,17 @@
* should be tested and standardized at some point in the future for
* optimal aging of shared pages.
*/
-boolean_t
+int
moea_ts_referenced(mmu_t mmu, vm_page_t m)
{
+ int count;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("moea_ts_referenced: page %p is not managed", m));
- return (moea_clear_bit(m, PTE_REF));
+ rw_wlock(&pvh_global_lock);
+ count = moea_clear_bit(m, PTE_REF);
+ rw_wunlock(&pvh_global_lock);
+ return (count);
}
/*
@@ -1409,7 +1442,7 @@
return;
}
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
pvo_head = vm_page_to_pvoh(m);
lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
@@ -1429,14 +1462,14 @@
PMAP_UNLOCK(pmap);
}
m->md.mdpg_cache_attrs = ma;
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
* Map a wired page into kernel virtual address space.
*/
void
-moea_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa)
+moea_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
{
moea_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
@@ -1471,7 +1504,7 @@
* Extract the physical page address associated with the given kernel virtual
* address.
*/
-vm_offset_t
+vm_paddr_t
moea_kextract(mmu_t mmu, vm_offset_t va)
{
struct pvo_entry *pvo;
@@ -1512,8 +1545,8 @@
* first usable address after the mapped region.
*/
vm_offset_t
-moea_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start,
- vm_offset_t pa_end, int prot)
+moea_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
+ vm_paddr_t pa_end, int prot)
{
vm_offset_t sva, va;
@@ -1543,7 +1576,7 @@
("moea_page_exists_quick: page %p is not managed", m));
loops = 0;
rv = FALSE;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
if (pvo->pvo_pmap == pmap) {
rv = TRUE;
@@ -1552,7 +1585,7 @@
if (++loops >= 16)
break;
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -1569,11 +1602,11 @@
count = 0;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (count);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
count++;
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (count);
}
@@ -1587,7 +1620,7 @@
KASSERT((int)pmap < VM_MIN_KERNEL_ADDRESS, ("moea_pinit: virt pmap"));
PMAP_LOCK_INIT(pmap);
- LIST_INIT(&pmap->pmap_pvo);
+ RB_INIT(&pmap->pmap_pvo);
entropy = 0;
__asm __volatile("mftb %0" : "=r"(entropy));
@@ -1661,9 +1694,8 @@
moea_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva,
vm_prot_t prot)
{
- struct pvo_entry *pvo;
+ struct pvo_entry *pvo, *tpvo, key;
struct pte *pt;
- int pteidx;
KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
("moea_protect: non current pmap"));
@@ -1673,13 +1705,12 @@
return;
}
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pm);
- for (; sva < eva; sva += PAGE_SIZE) {
- pvo = moea_pvo_find_va(pm, sva, &pteidx);
- if (pvo == NULL)
- continue;
-
+ key.pvo_vaddr = sva;
+ for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
+ pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
+ tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
if ((prot & VM_PROT_EXECUTE) == 0)
pvo->pvo_vaddr &= ~PVO_EXECUTABLE;
@@ -1687,7 +1718,7 @@
* Grab the PTE pointer before we diddle with the cached PTE
* copy.
*/
- pt = moea_pvo_to_pte(pvo, pteidx);
+ pt = moea_pvo_to_pte(pvo, -1);
/*
* Change the protection of the page.
*/
@@ -1702,7 +1733,7 @@
mtx_unlock(&moea_table_mutex);
}
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pm);
}
@@ -1766,26 +1797,18 @@
void
moea_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
{
- struct pvo_entry *pvo, *tpvo;
- int pteidx;
+ struct pvo_entry *pvo, *tpvo, key;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pm);
- if ((eva - sva)/PAGE_SIZE < 10) {
- for (; sva < eva; sva += PAGE_SIZE) {
- pvo = moea_pvo_find_va(pm, sva, &pteidx);
- if (pvo != NULL)
- moea_pvo_remove(pvo, pteidx);
- }
- } else {
- LIST_FOREACH_SAFE(pvo, &pm->pmap_pvo, pvo_plink, tpvo) {
- if (PVO_VADDR(pvo) < sva || PVO_VADDR(pvo) >= eva)
- continue;
- moea_pvo_remove(pvo, -1);
- }
+ key.pvo_vaddr = sva;
+ for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
+ pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
+ tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+ moea_pvo_remove(pvo, -1);
}
PMAP_UNLOCK(pm);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -1799,7 +1822,7 @@
struct pvo_entry *pvo, *next_pvo;
pmap_t pmap;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
pvo_head = vm_page_to_pvoh(m);
for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) {
next_pvo = LIST_NEXT(pvo, pvo_vlink);
@@ -1809,12 +1832,12 @@
moea_pvo_remove(pvo, -1);
PMAP_UNLOCK(pmap);
}
- if ((m->aflags & PGA_WRITEABLE) && moea_is_modified(mmu, m)) {
+ if ((m->aflags & PGA_WRITEABLE) && moea_query_bit(m, PTE_CHG)) {
moea_attr_clear(m, PTE_CHG);
vm_page_dirty(m);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -1946,7 +1969,7 @@
/*
* Add to pmap list
*/
- LIST_INSERT_HEAD(&pm->pmap_pvo, pvo, pvo_plink);
+ RB_INSERT(pvo_tree, &pm->pmap_pvo, pvo);
/*
* Remember if the list was empty and therefore will be the first
@@ -2017,7 +2040,7 @@
* Remove this PVO from the PV and pmap lists.
*/
LIST_REMOVE(pvo, pvo_vlink);
- LIST_REMOVE(pvo, pvo_plink);
+ RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
/*
* Remove this from the overflow list and return it to the pool
@@ -2286,10 +2309,10 @@
struct pvo_entry *pvo;
struct pte *pt;
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
if (moea_attr_fetch(m) & ptebit)
return (TRUE);
- vm_page_lock_queues();
LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
/*
@@ -2298,7 +2321,6 @@
*/
if (pvo->pvo_pte.pte.pte_lo & ptebit) {
moea_attr_save(m, ptebit);
- vm_page_unlock_queues();
return (TRUE);
}
}
@@ -2322,13 +2344,11 @@
mtx_unlock(&moea_table_mutex);
if (pvo->pvo_pte.pte.pte_lo & ptebit) {
moea_attr_save(m, ptebit);
- vm_page_unlock_queues();
return (TRUE);
}
}
}
- vm_page_unlock_queues();
return (FALSE);
}
@@ -2339,7 +2359,7 @@
struct pvo_entry *pvo;
struct pte *pt;
- vm_page_lock_queues();
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
/*
* Clear the cached value.
@@ -2373,7 +2393,6 @@
pvo->pvo_pte.pte.pte_lo &= ~ptebit;
}
- vm_page_unlock_queues();
return (count);
}
@@ -2418,7 +2437,7 @@
}
boolean_t
-moea_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size)
+moea_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
{
int i;
@@ -2441,7 +2460,7 @@
* NOT real memory.
*/
void *
-moea_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size)
+moea_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
{
return (moea_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT));
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/aim/mmu_oea64.c
--- a/head/sys/powerpc/aim/mmu_oea64.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/aim/mmu_oea64.c Wed Jul 25 16:40:53 2012 +0300
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/aim/mmu_oea64.c 234156 2012-04-11 22:23:50Z nwhitehorn $");
+__FBSDID("$FreeBSD: head/sys/powerpc/aim/mmu_oea64.c 238357 2012-07-10 22:10:21Z alc $");
/*
* Manages physical address maps.
@@ -223,8 +223,6 @@
* PVO data.
*/
struct pvo_head *moea64_pvo_table; /* pvo entries by pteg index */
-struct pvo_head moea64_pvo_kunmanaged = /* list of unmanaged pages */
- LIST_HEAD_INITIALIZER(moea64_pvo_kunmanaged);
uma_zone_t moea64_upvo_zone; /* zone for pvo entries for unmanaged pages */
uma_zone_t moea64_mpvo_zone; /* zone for pvo entries for managed pages */
@@ -307,8 +305,8 @@
boolean_t moea64_is_modified(mmu_t, vm_page_t);
boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
boolean_t moea64_is_referenced(mmu_t, vm_page_t);
-boolean_t moea64_ts_referenced(mmu_t, vm_page_t);
-vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int);
+int moea64_ts_referenced(mmu_t, vm_page_t);
+vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t);
int moea64_page_wired_mappings(mmu_t, vm_page_t);
void moea64_pinit(mmu_t, pmap_t);
@@ -326,14 +324,14 @@
void moea64_zero_page_idle(mmu_t, vm_page_t);
void moea64_activate(mmu_t, struct thread *);
void moea64_deactivate(mmu_t, struct thread *);
-void *moea64_mapdev(mmu_t, vm_offset_t, vm_size_t);
+void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t);
void *moea64_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t);
void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t);
-vm_offset_t moea64_kextract(mmu_t, vm_offset_t);
+vm_paddr_t moea64_kextract(mmu_t, vm_offset_t);
void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma);
void moea64_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t ma);
-void moea64_kenter(mmu_t, vm_offset_t, vm_offset_t);
-boolean_t moea64_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t);
+void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t);
+boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t);
static mmu_method_t moea64_methods[] = {
@@ -622,8 +620,8 @@
pte_lo |= LPTE_G;
moea64_pvo_enter(mmup, kernel_pmap, moea64_upvo_zone,
- &moea64_pvo_kunmanaged, pa, pa,
- pte_lo, PVO_WIRED | PVO_LARGE);
+ NULL, pa, pa, pte_lo,
+ PVO_WIRED | PVO_LARGE);
}
}
PMAP_UNLOCK(kernel_pmap);
@@ -820,7 +818,7 @@
kernel_pmap->pmap_phys = kernel_pmap;
CPU_FILL(&kernel_pmap->pm_active);
- LIST_INIT(&kernel_pmap->pmap_pvo);
+ RB_INIT(&kernel_pmap->pmap_pvo);
PMAP_LOCK_INIT(kernel_pmap);
@@ -1179,7 +1177,7 @@
int error;
if (!moea64_initialized) {
- pvo_head = &moea64_pvo_kunmanaged;
+ pvo_head = NULL;
pg = NULL;
zone = moea64_upvo_zone;
pvo_flags = 0;
@@ -1197,7 +1195,7 @@
/* XXX change the pvo head for fake pages */
if ((m->oflags & VPO_UNMANAGED) != 0) {
pvo_flags &= ~PVO_MANAGED;
- pvo_head = &moea64_pvo_kunmanaged;
+ pvo_head = NULL;
zone = moea64_upvo_zone;
}
@@ -1315,7 +1313,6 @@
struct pvo_entry *pvo;
vm_paddr_t pa;
- LOCK_TABLE_RD();
PMAP_LOCK(pm);
pvo = moea64_pvo_find_va(pm, va);
if (pvo == NULL)
@@ -1323,7 +1320,6 @@
else
pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) |
(va - PVO_VADDR(pvo));
- UNLOCK_TABLE_RD();
PMAP_UNLOCK(pm);
return (pa);
}
@@ -1342,7 +1338,6 @@
m = NULL;
pa = 0;
- LOCK_TABLE_RD();
PMAP_LOCK(pmap);
retry:
pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
@@ -1356,7 +1351,6 @@
vm_page_hold(m);
}
PA_UNLOCK_COND(pa);
- UNLOCK_TABLE_RD();
PMAP_UNLOCK(pmap);
return (m);
}
@@ -1404,8 +1398,7 @@
PMAP_LOCK(kernel_pmap);
moea64_pvo_enter(installed_mmu, kernel_pmap, moea64_upvo_zone,
- &moea64_pvo_kunmanaged, va, VM_PAGE_TO_PHYS(m), LPTE_M,
- PVO_WIRED | PVO_BOOTSTRAP);
+ NULL, va, VM_PAGE_TO_PHYS(m), LPTE_M, PVO_WIRED | PVO_BOOTSTRAP);
if (needed_lock)
PMAP_UNLOCK(kernel_pmap);
@@ -1479,12 +1472,10 @@
struct pvo_entry *pvo;
boolean_t rv;
- LOCK_TABLE_RD();
PMAP_LOCK(pmap);
pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
rv = pvo == NULL || (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0;
PMAP_UNLOCK(pmap);
- UNLOCK_TABLE_RD();
return (rv);
}
@@ -1579,7 +1570,7 @@
* should be tested and standardized at some point in the future for
* optimal aging of shared pages.
*/
-boolean_t
+int
moea64_ts_referenced(mmu_t mmu, vm_page_t m)
{
@@ -1640,7 +1631,7 @@
LOCK_TABLE_WR();
PMAP_LOCK(kernel_pmap);
error = moea64_pvo_enter(mmu, kernel_pmap, moea64_upvo_zone,
- &moea64_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED);
+ NULL, va, pa, pte_lo, PVO_WIRED);
PMAP_UNLOCK(kernel_pmap);
UNLOCK_TABLE_WR();
@@ -1650,7 +1641,7 @@
}
void
-moea64_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa)
+moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
{
moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
@@ -1660,7 +1651,7 @@
* Extract the physical page address associated with the given kernel virtual
* address.
*/
-vm_offset_t
+vm_paddr_t
moea64_kextract(mmu_t mmu, vm_offset_t va)
{
struct pvo_entry *pvo;
@@ -1673,13 +1664,11 @@
if (va < VM_MIN_KERNEL_ADDRESS)
return (va);
- LOCK_TABLE_RD();
PMAP_LOCK(kernel_pmap);
pvo = moea64_pvo_find_va(kernel_pmap, va);
KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR,
va));
pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va - PVO_VADDR(pvo));
- UNLOCK_TABLE_RD();
PMAP_UNLOCK(kernel_pmap);
return (pa);
}
@@ -1703,8 +1692,8 @@
* first usable address after the mapped region.
*/
vm_offset_t
-moea64_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start,
- vm_offset_t pa_end, int prot)
+moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
+ vm_paddr_t pa_end, int prot)
{
vm_offset_t sva, va;
@@ -1826,7 +1815,7 @@
moea64_pinit(mmu_t mmu, pmap_t pmap)
{
PMAP_LOCK_INIT(pmap);
- LIST_INIT(&pmap->pmap_pvo);
+ RB_INIT(&pmap->pmap_pvo);
pmap->pm_slb_tree_root = slb_alloc_tree();
pmap->pm_slb = slb_alloc_user_cache();
@@ -1840,7 +1829,7 @@
uint32_t hash;
PMAP_LOCK_INIT(pmap);
- LIST_INIT(&pmap->pmap_pvo);
+ RB_INIT(&pmap->pmap_pvo);
if (pmap_bootstrapped)
pmap->pmap_phys = (pmap_t)moea64_kextract(mmu,
@@ -1936,7 +1925,7 @@
moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva,
vm_prot_t prot)
{
- struct pvo_entry *pvo, *tpvo;
+ struct pvo_entry *pvo, *tpvo, key;
CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
sva, eva, prot);
@@ -1951,26 +1940,11 @@
LOCK_TABLE_RD();
PMAP_LOCK(pm);
- if ((eva - sva)/PAGE_SIZE < pm->pm_stats.resident_count) {
- while (sva < eva) {
- #ifdef __powerpc64__
- if (pm != kernel_pmap &&
- user_va_to_slb_entry(pm, sva) == NULL) {
- sva = roundup2(sva + 1, SEGMENT_LENGTH);
- continue;
- }
- #endif
- pvo = moea64_pvo_find_va(pm, sva);
- if (pvo != NULL)
- moea64_pvo_protect(mmu, pm, pvo, prot);
- sva += PAGE_SIZE;
- }
- } else {
- LIST_FOREACH_SAFE(pvo, &pm->pmap_pvo, pvo_plink, tpvo) {
- if (PVO_VADDR(pvo) < sva || PVO_VADDR(pvo) >= eva)
- continue;
- moea64_pvo_protect(mmu, pm, pvo, prot);
- }
+ key.pvo_vaddr = sva;
+ for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
+ pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
+ tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+ moea64_pvo_protect(mmu, pm, pvo, prot);
}
UNLOCK_TABLE_RD();
PMAP_UNLOCK(pm);
@@ -2049,7 +2023,7 @@
LOCK_TABLE_WR();
PMAP_LOCK(pm);
- LIST_FOREACH_SAFE(pvo, &pm->pmap_pvo, pvo_plink, tpvo) {
+ RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) {
if (!(pvo->pvo_vaddr & PVO_WIRED))
moea64_pvo_remove(mmu, pvo);
}
@@ -2063,7 +2037,7 @@
void
moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
{
- struct pvo_entry *pvo, *tpvo;
+ struct pvo_entry *pvo, *tpvo, key;
/*
* Perform an unsynchronized read. This is, however, safe.
@@ -2073,26 +2047,11 @@
LOCK_TABLE_WR();
PMAP_LOCK(pm);
- if ((eva - sva)/PAGE_SIZE < pm->pm_stats.resident_count) {
- while (sva < eva) {
- #ifdef __powerpc64__
- if (pm != kernel_pmap &&
- user_va_to_slb_entry(pm, sva) == NULL) {
- sva = roundup2(sva + 1, SEGMENT_LENGTH);
- continue;
- }
- #endif
- pvo = moea64_pvo_find_va(pm, sva);
- if (pvo != NULL)
- moea64_pvo_remove(mmu, pvo);
- sva += PAGE_SIZE;
- }
- } else {
- LIST_FOREACH_SAFE(pvo, &pm->pmap_pvo, pvo_plink, tpvo) {
- if (PVO_VADDR(pvo) < sva || PVO_VADDR(pvo) >= eva)
- continue;
- moea64_pvo_remove(mmu, pvo);
- }
+ key.pvo_vaddr = sva;
+ for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
+ pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
+ tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+ moea64_pvo_remove(mmu, pvo);
}
UNLOCK_TABLE_WR();
PMAP_UNLOCK(pm);
@@ -2244,12 +2203,6 @@
moea64_bpvo_pool_index++;
bootstrap = 1;
} else {
- /*
- * Note: drop the table lock around the UMA allocation in
- * case the UMA allocator needs to manipulate the page
- * table. The mapping we are working with is already
- * protected by the PMAP lock.
- */
pvo = uma_zalloc(zone, M_NOWAIT);
}
@@ -2266,7 +2219,7 @@
if (flags & PVO_WIRED)
pvo->pvo_vaddr |= PVO_WIRED;
- if (pvo_head != &moea64_pvo_kunmanaged)
+ if (pvo_head != NULL)
pvo->pvo_vaddr |= PVO_MANAGED;
if (bootstrap)
pvo->pvo_vaddr |= PVO_BOOTSTRAP;
@@ -2279,15 +2232,17 @@
/*
* Add to pmap list
*/
- LIST_INSERT_HEAD(&pm->pmap_pvo, pvo, pvo_plink);
+ RB_INSERT(pvo_tree, &pm->pmap_pvo, pvo);
/*
* Remember if the list was empty and therefore will be the first
* item.
*/
- if (LIST_FIRST(pvo_head) == NULL)
- first = 1;
- LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
+ if (pvo_head != NULL) {
+ if (LIST_FIRST(pvo_head) == NULL)
+ first = 1;
+ LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
+ }
if (pvo->pvo_vaddr & PVO_WIRED) {
pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED;
@@ -2350,10 +2305,9 @@
pvo->pvo_pmap->pm_stats.wired_count--;
/*
- * Remove this PVO from the PV and pmap lists.
+ * Remove this PVO from the pmap list.
*/
- LIST_REMOVE(pvo, pvo_vlink);
- LIST_REMOVE(pvo, pvo_plink);
+ RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
/*
* Remove this from the overflow list and return it to the pool
@@ -2367,6 +2321,7 @@
pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN);
if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED && pg != NULL) {
+ LIST_REMOVE(pvo, pvo_vlink);
if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) {
if (pvo->pvo_pte.lpte.pte_lo & LPTE_CHG)
vm_page_dirty(pg);
@@ -2390,41 +2345,10 @@
static struct pvo_entry *
moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
{
- struct pvo_entry *pvo;
- int ptegidx;
- uint64_t vsid;
- #ifdef __powerpc64__
- uint64_t slbv;
+ struct pvo_entry key;
- if (pm == kernel_pmap) {
- slbv = kernel_va_to_slbv(va);
- } else {
- struct slb *slb;
- slb = user_va_to_slb_entry(pm, va);
- /* The page is not mapped if the segment isn't */
- if (slb == NULL)
- return NULL;
- slbv = slb->slbv;
- }
-
- vsid = (slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT;
- if (slbv & SLBV_L)
- va &= ~moea64_large_page_mask;
- else
- va &= ~ADDR_POFF;
- ptegidx = va_to_pteg(vsid, va, slbv & SLBV_L);
- #else
- va &= ~ADDR_POFF;
- vsid = va_to_vsid(pm, va);
- ptegidx = va_to_pteg(vsid, va, 0);
- #endif
-
- LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) {
- if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va)
- break;
- }
-
- return (pvo);
+ key.pvo_vaddr = va & ~ADDR_POFF;
+ return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key));
}
static boolean_t
@@ -2516,23 +2440,23 @@
}
boolean_t
-moea64_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size)
+moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
{
- struct pvo_entry *pvo;
+ struct pvo_entry *pvo, key;
vm_offset_t ppa;
int error = 0;
- LOCK_TABLE_RD();
PMAP_LOCK(kernel_pmap);
- for (ppa = pa & ~ADDR_POFF; ppa < pa + size; ppa += PAGE_SIZE) {
- pvo = moea64_pvo_find_va(kernel_pmap, ppa);
+ key.pvo_vaddr = ppa = pa & ~ADDR_POFF;
+ for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key);
+ ppa < pa + size; ppa += PAGE_SIZE,
+ pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) {
if (pvo == NULL ||
(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) != ppa) {
error = EFAULT;
break;
}
}
- UNLOCK_TABLE_RD();
PMAP_UNLOCK(kernel_pmap);
return (error);
@@ -2569,7 +2493,7 @@
}
void *
-moea64_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size)
+moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
{
return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT);
@@ -2595,7 +2519,6 @@
vm_paddr_t pa;
vm_size_t len;
- LOCK_TABLE_RD();
PMAP_LOCK(pm);
while (sz > 0) {
lim = round_page(va);
@@ -2609,6 +2532,5 @@
va += len;
sz -= len;
}
- UNLOCK_TABLE_RD();
PMAP_UNLOCK(pm);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/aim/moea64_native.c
--- a/head/sys/powerpc/aim/moea64_native.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/aim/moea64_native.c Wed Jul 25 16:40:53 2012 +0300
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/aim/moea64_native.c 233964 2012-04-06 22:33:13Z nwhitehorn $");
+__FBSDID("$FreeBSD: head/sys/powerpc/aim/moea64_native.c 234760 2012-04-28 14:42:49Z nwhitehorn $");
/*
* Native 64-bit page table operations for running without a hypervisor.
@@ -133,36 +133,31 @@
#define VSID_HASH_MASK 0x0000007fffffffffULL
-/*
- * The tlbie instruction must be executed in 64-bit mode
- * so we have to twiddle MSR[SF] around every invocation.
- * Just to add to the fun, exceptions must be off as well
- * so that we can't trap in 64-bit mode. What a pain.
- */
-static struct mtx tlbie_mutex;
-
static __inline void
TLBIE(uint64_t vpn) {
#ifndef __powerpc64__
register_t vpn_hi, vpn_lo;
register_t msr;
- register_t scratch;
+ register_t scratch, intr;
#endif
+ static volatile u_int tlbie_lock = 0;
+
vpn <<= ADDR_PIDX_SHFT;
vpn &= ~(0xffffULL << 48);
+ /* Hobo spinlock: we need stronger guarantees than mutexes provide */
+ while (!atomic_cmpset_int(&tlbie_lock, 0, 1));
+ isync(); /* Flush instruction queue once lock acquired */
+
#ifdef __powerpc64__
- mtx_lock(&tlbie_mutex);
__asm __volatile("tlbie %0" :: "r"(vpn) : "memory");
- mtx_unlock(&tlbie_mutex);
- __asm __volatile("eieio; tlbsync; ptesync");
+ __asm __volatile("eieio; tlbsync; ptesync" ::: "memory");
#else
vpn_hi = (uint32_t)(vpn >> 32);
vpn_lo = (uint32_t)vpn;
- /* Note: spin mutex is to disable exceptions while fiddling MSR */
- mtx_lock_spin(&tlbie_mutex);
+ intr = intr_disable();
__asm __volatile("\
mfmsr %0; \
mr %1, %0; \
@@ -179,8 +174,11 @@
ptesync;"
: "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1)
: "memory");
- mtx_unlock_spin(&tlbie_mutex);
+ intr_restore(intr);
#endif
+
+ /* No barriers or special ops -- taken care of by ptesync above */
+ tlbie_lock = 0;
}
#define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR)
@@ -261,9 +259,9 @@
* As shown in Section 7.6.3.2.3
*/
pt->pte_lo &= ~ptebit;
- sched_pin();
+ critical_enter();
TLBIE(vpn);
- sched_unpin();
+ critical_exit();
}
static void
@@ -297,12 +295,12 @@
* Invalidate the pte.
*/
isync();
- sched_pin();
+ critical_enter();
pvo_pt->pte_hi &= ~LPTE_VALID;
pt->pte_hi &= ~LPTE_VALID;
PTESYNC();
TLBIE(vpn);
- sched_unpin();
+ critical_exit();
/*
* Save the reg & chg bits.
@@ -405,15 +403,6 @@
CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table);
- /*
- * Initialize the TLBIE lock. TLBIE can only be executed by one CPU.
- */
-#ifdef __powerpc64__
- mtx_init(&tlbie_mutex, "tlbie", NULL, MTX_DEF);
-#else
- mtx_init(&tlbie_mutex, "tlbie", NULL, MTX_SPIN);
-#endif
-
moea64_mid_bootstrap(mmup, kernelstart, kernelend);
/*
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/aim/slb.c
--- a/head/sys/powerpc/aim/slb.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/aim/slb.c Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD: head/sys/powerpc/aim/slb.c 230123 2012-01-15 00:08:14Z nwhitehorn $
+ * $FreeBSD: head/sys/powerpc/aim/slb.c 234745 2012-04-28 00:12:23Z nwhitehorn $
*/
#include <sys/param.h>
@@ -139,7 +139,7 @@
* that a lockless searcher always sees a valid path through
* the tree.
*/
- powerpc_sync();
+ mb();
idx = esid2idx(esid, parent->ua_level);
parent->u.ua_child[idx] = child;
@@ -187,7 +187,7 @@
idx = esid2idx(child->ua_base, inter->ua_level);
inter->u.ua_child[idx] = child;
setbit(&inter->ua_alloc, idx);
- powerpc_sync();
+ mb();
/* Set up parent to point to intermediate node ... */
idx = esid2idx(inter->ua_base, parent->ua_level);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/aim/swtch32.S
--- a/head/sys/powerpc/aim/swtch32.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/aim/swtch32.S Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/powerpc/aim/swtch32.S 234517 2012-04-20 23:01:36Z nwhitehorn $ */
+/* $FreeBSD: head/sys/powerpc/aim/swtch32.S 235013 2012-05-04 16:00:22Z nwhitehorn $ */
/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */
/*-
@@ -124,7 +124,8 @@
blocked_loop:
lwz %r7,TD_LOCK(%r2)
cmpw %r6,%r7
- beq blocked_loop
+ beq- blocked_loop
+ isync
#endif
mfsprg %r7,0 /* Get the pcpu pointer */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/aim/swtch64.S
--- a/head/sys/powerpc/aim/swtch64.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/aim/swtch64.S Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/powerpc/aim/swtch64.S 234517 2012-04-20 23:01:36Z nwhitehorn $ */
+/* $FreeBSD: head/sys/powerpc/aim/swtch64.S 235013 2012-05-04 16:00:22Z nwhitehorn $ */
/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */
/*-
@@ -150,7 +150,8 @@
blocked_loop:
ld %r7,TD_LOCK(%r13)
cmpd %r6,%r7
- beq blocked_loop
+ beq- blocked_loop
+ isync
#endif
mfsprg %r7,0 /* Get the pcpu pointer */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/booke/locore.S
--- a/head/sys/powerpc/booke/locore.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/booke/locore.S Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD: head/sys/powerpc/booke/locore.S 224617 2011-08-02 23:33:44Z marcel $
+ * $FreeBSD: head/sys/powerpc/booke/locore.S 236141 2012-05-27 10:25:20Z raj $
*/
#include "assym.s"
@@ -218,7 +218,7 @@
mr %r3, %r30
mr %r4, %r31
- /* Prepare e500 core */
+ /* Prepare core */
bl booke_init
/* Switch to thread0.td_kstack now */
@@ -242,14 +242,20 @@
__boot_page:
bl 1f
- .globl kernload_ap
-kernload_ap:
+ .globl bp_trace
+bp_trace:
+ .long 0
+
+ .globl bp_kernload
+bp_kernload:
.long 0
/*
* Initial configuration
*/
1:
+ mflr %r31 /* r31 hold the address of bp_trace */
+
/* Set HIDs */
lis %r3, HID0_E500_DEFAULT_SET at h
ori %r3, %r3, HID0_E500_DEFAULT_SET at l
@@ -318,15 +324,15 @@
mtspr SPR_MAS2, %r3
isync
- /* Retrieve kernel load [physical] address from kernload_ap */
+ /* Retrieve kernel load [physical] address from bp_kernload */
bl 4f
4: mflr %r3
rlwinm %r3, %r3, 0, 0, 19
- lis %r4, kernload_ap at h
- ori %r4, %r4, kernload_ap at l
+ lis %r4, bp_kernload at h
+ ori %r4, %r4, bp_kernload at l
lis %r5, __boot_page at h
ori %r5, %r5, __boot_page at l
- sub %r4, %r4, %r5 /* offset of kernload_ap within __boot_page */
+ sub %r4, %r4, %r5 /* offset of bp_kernload within __boot_page */
lwzx %r3, %r4, %r3
/* Set RPN and protection */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/booke/machdep.c
--- a/head/sys/powerpc/booke/machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/booke/machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,5 +1,5 @@
/*-
- * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8 at semihalf.com>
+ * Copyright (C) 2006-2012 Semihalf
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -79,7 +79,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/booke/machdep.c 230767 2012-01-30 07:56:00Z kib $");
+__FBSDID("$FreeBSD: head/sys/powerpc/booke/machdep.c 238030 2012-07-02 21:11:01Z marcel $");
#include "opt_compat.h"
#include "opt_ddb.h"
@@ -129,6 +129,7 @@
#include <machine/md_var.h>
#include <machine/mmuvar.h>
#include <machine/sigframe.h>
+#include <machine/machdep.h>
#include <machine/metadata.h>
#include <machine/platform.h>
@@ -138,8 +139,6 @@
#include <dev/fdt/fdt_common.h>
#include <dev/ofw/openfirm.h>
-#include <powerpc/mpc85xx/mpc85xx.h>
-
#ifdef DDB
extern vm_offset_t ksym_start, ksym_end;
#endif
@@ -158,11 +157,6 @@
extern unsigned char __sbss_end[];
extern unsigned char _end[];
-extern void dcache_enable(void);
-extern void dcache_inval(void);
-extern void icache_enable(void);
-extern void icache_inval(void);
-
/*
* Bootinfo is passed to us by legacy loaders. Save the address of the
* structure to handle backward compatibility.
@@ -185,8 +179,8 @@
int hw_direct_map = 0;
-static void cpu_e500_startup(void *);
-SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_e500_startup, NULL);
+static void cpu_booke_startup(void *);
+SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_booke_startup, NULL);
void print_kernel_section_addr(void);
void print_kenv(void);
@@ -195,7 +189,7 @@
extern int elf32_nxstack;
static void
-cpu_e500_startup(void *dummy)
+cpu_booke_startup(void *dummy)
{
int indx, size;
@@ -286,7 +280,6 @@
struct pcpu *pc;
void *kmdp, *mdp;
vm_offset_t dtbp, end;
- uint32_t csr;
kmdp = NULL;
@@ -359,9 +352,9 @@
while (1);
OF_interpret("perform-fixup", 0);
-
- /* Initialize TLB1 handling */
- tlb1_init(fdt_immr_pa);
+
+ /* Set up TLB initially */
+ booke_init_tlb(fdt_immr_pa);
/* Reset Time Base */
mttb(0);
@@ -392,20 +385,20 @@
debugf(" boothowto = 0x%08x\n", boothowto);
debugf(" kernel ccsrbar = 0x%08x\n", CCSRBAR_VA);
debugf(" MSR = 0x%08x\n", mfmsr());
+#if defined(BOOKE_E500)
debugf(" HID0 = 0x%08x\n", mfspr(SPR_HID0));
debugf(" HID1 = 0x%08x\n", mfspr(SPR_HID1));
debugf(" BUCSR = 0x%08x\n", mfspr(SPR_BUCSR));
-
- __asm __volatile("msync; isync");
- csr = ccsr_read4(OCP85XX_L2CTL);
- debugf(" L2CTL = 0x%08x\n", csr);
+#endif
debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
print_kernel_section_addr();
print_kenv();
+#if defined(BOOKE_E500)
//tlb1_print_entries();
//tlb1_print_tlbentries();
+#endif
kdb_init();
@@ -421,8 +414,10 @@
pmap_mmu_install(MMU_TYPE_BOOKE, 0);
pmap_bootstrap((uintptr_t)kernel_text, end);
debugf("MSR = 0x%08x\n", mfmsr());
+#if defined(BOOKE_E500)
//tlb1_print_entries();
//tlb1_print_tlbentries();
+#endif
/* Initialize params/tunables that are derived from memsize. */
init_param2(physmem);
@@ -441,29 +436,8 @@
mtmsr(mfmsr() | PSL_ME);
isync();
- /* Enable D-cache if applicable */
- csr = mfspr(SPR_L1CSR0);
- if ((csr & L1CSR0_DCE) == 0) {
- dcache_inval();
- dcache_enable();
- }
-
- csr = mfspr(SPR_L1CSR0);
- if ((boothowto & RB_VERBOSE) != 0 || (csr & L1CSR0_DCE) == 0)
- printf("L1 D-cache %sabled\n",
- (csr & L1CSR0_DCE) ? "en" : "dis");
-
- /* Enable L1 I-cache if applicable. */
- csr = mfspr(SPR_L1CSR1);
- if ((csr & L1CSR1_ICE) == 0) {
- icache_inval();
- icache_enable();
- }
-
- csr = mfspr(SPR_L1CSR1);
- if ((boothowto & RB_VERBOSE) != 0 || (csr & L1CSR1_ICE) == 0)
- printf("L1 I-cache %sabled\n",
- (csr & L1CSR1_ICE) ? "en" : "dis");
+ /* Enable L1 caches */
+ booke_enable_l1_cache();
debugf("%s: SP = 0x%08x\n", __func__,
((uintptr_t)thread0.td_pcb - 16) & ~15);
@@ -499,7 +473,24 @@
void
cpu_flush_dcache(void *ptr, size_t len)
{
- /* TBD */
+ register_t addr, off;
+
+ /*
+ * Align the address to a cacheline and adjust the length
+ * accordingly. Then round the length to a multiple of the
+ * cacheline for easy looping.
+ */
+ addr = (uintptr_t)ptr;
+ off = addr & (cacheline_size - 1);
+ addr -= off;
+ len = (len + off + cacheline_size - 1) & ~(cacheline_size - 1);
+
+ while (len > 0) {
+ __asm __volatile ("dcbf 0,%0" :: "r"(addr));
+ __asm __volatile ("sync");
+ addr += cacheline_size;
+ len -= cacheline_size;
+ }
}
void
@@ -538,7 +529,8 @@
{
mtmsr(mfmsr() & ~(PSL_CE | PSL_EE | PSL_ME | PSL_DE));
- while (1);
+ while (1)
+ ;
}
int
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/booke/machdep_e500.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/powerpc/booke/machdep_e500.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,158 @@
+/*-
+ * Copyright (c) 2011-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/powerpc/booke/machdep_e500.c 236324 2012-05-30 17:34:40Z raj $");
+
+#include <sys/types.h>
+#include <sys/reboot.h>
+
+#include <machine/machdep.h>
+
+#include <dev/fdt/fdt_common.h>
+
+#include <powerpc/mpc85xx/mpc85xx.h>
+
+extern void dcache_enable(void);
+extern void dcache_inval(void);
+extern void icache_enable(void);
+extern void icache_inval(void);
+extern void l2cache_enable(void);
+extern void l2cache_inval(void);
+
+void
+booke_init_tlb(vm_paddr_t fdt_immr_pa)
+{
+
+ /* Initialize TLB1 handling */
+ tlb1_init(fdt_immr_pa);
+}
+
+void
+booke_enable_l1_cache(void)
+{
+ uint32_t csr;
+
+ /* Enable D-cache if applicable */
+ csr = mfspr(SPR_L1CSR0);
+ if ((csr & L1CSR0_DCE) == 0) {
+ dcache_inval();
+ dcache_enable();
+ }
+
+ csr = mfspr(SPR_L1CSR0);
+ if ((boothowto & RB_VERBOSE) != 0 || (csr & L1CSR0_DCE) == 0)
+ printf("L1 D-cache %sabled\n",
+ (csr & L1CSR0_DCE) ? "en" : "dis");
+
+ /* Enable L1 I-cache if applicable. */
+ csr = mfspr(SPR_L1CSR1);
+ if ((csr & L1CSR1_ICE) == 0) {
+ icache_inval();
+ icache_enable();
+ }
+
+ csr = mfspr(SPR_L1CSR1);
+ if ((boothowto & RB_VERBOSE) != 0 || (csr & L1CSR1_ICE) == 0)
+ printf("L1 I-cache %sabled\n",
+ (csr & L1CSR1_ICE) ? "en" : "dis");
+}
+
+#if 0
+void
+booke_enable_l2_cache(void)
+{
+ uint32_t csr;
+
+ /* Enable L2 cache on E500mc */
+ if ((((mfpvr() >> 16) & 0xFFFF) == FSL_E500mc) ||
+ (((mfpvr() >> 16) & 0xFFFF) == FSL_E5500)) {
+ csr = mfspr(SPR_L2CSR0);
+ if ((csr & L2CSR0_L2E) == 0) {
+ l2cache_inval();
+ l2cache_enable();
+ }
+
+ csr = mfspr(SPR_L2CSR0);
+ if ((boothowto & RB_VERBOSE) != 0 || (csr & L2CSR0_L2E) == 0)
+ printf("L2 cache %sabled\n",
+ (csr & L2CSR0_L2E) ? "en" : "dis");
+ }
+}
+
+void
+booke_enable_l3_cache(void)
+{
+ uint32_t csr, size, ver;
+
+ /* Enable L3 CoreNet Platform Cache (CPC) */
+ ver = SVR_VER(mfspr(SPR_SVR));
+ if (ver == SVR_P2041 || ver == SVR_P2041E || ver == SVR_P3041 ||
+ ver == SVR_P3041E || ver == SVR_P5020 || ver == SVR_P5020E) {
+ csr = ccsr_read4(OCP85XX_CPC_CSR0);
+ if ((csr & OCP85XX_CPC_CSR0_CE) == 0) {
+ l3cache_inval();
+ l3cache_enable();
+ }
+
+ csr = ccsr_read4(OCP85XX_CPC_CSR0);
+ if ((boothowto & RB_VERBOSE) != 0 ||
+ (csr & OCP85XX_CPC_CSR0_CE) == 0) {
+ size = OCP85XX_CPC_CFG0_SZ_K(ccsr_read4(OCP85XX_CPC_CFG0));
+ printf("L3 Corenet Platform Cache: %d KB %sabled\n",
+ size, (csr & OCP85XX_CPC_CSR0_CE) == 0 ?
+ "dis" : "en");
+ }
+ }
+}
+
+void
+booke_disable_l2_cache(void)
+{
+}
+
+static void
+l3cache_inval(void)
+{
+
+ /* Flash invalidate the CPC and clear all the locks */
+ ccsr_write4(OCP85XX_CPC_CSR0, OCP85XX_CPC_CSR0_FI |
+ OCP85XX_CPC_CSR0_LFC);
+ while (ccsr_read4(OCP85XX_CPC_CSR0) & (OCP85XX_CPC_CSR0_FI |
+ OCP85XX_CPC_CSR0_LFC))
+ ;
+}
+
+static void
+l3cache_enable(void)
+{
+
+ ccsr_write4(OCP85XX_CPC_CSR0, OCP85XX_CPC_CSR0_CE |
+ OCP85XX_CPC_CSR0_PE);
+ /* Read back to sync write */
+ ccsr_read4(OCP85XX_CPC_CSR0);
+}
+#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/booke/machdep_ppc4xx.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/powerpc/booke/machdep_ppc4xx.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,219 @@
+/*-
+ * Copyright (c) 2011-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/powerpc/booke/machdep_ppc4xx.c 236324 2012-05-30 17:34:40Z raj $");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/machdep.h>
+
+#include <powerpc/booke/dcr.h>
+#include <powerpc/apm86xxx/apm86xxx.h>
+
+#include <dev/fdt/fdt_common.h>
+
+#define OCP_ADDR_WORDLO(addr) ((uint32_t)((uint64_t)(addr) & 0xFFFFFFFF))
+#define OCP_ADDR_WORDHI(addr) ((uint32_t)((uint64_t)(addr) >> 32))
+
+extern void tlb_write(u_int, uint32_t, uint32_t, uint32_t, tlbtid_t, uint32_t,
+ uint32_t);
+extern void tlb_read(u_int, uint32_t *, uint32_t *, uint32_t *, uint32_t *,
+ uint32_t *, uint32_t *);
+
+unsigned int tlb_static_entries;
+unsigned int tlb_current_entry = TLB_SIZE;
+unsigned int tlb_misses = 0;
+unsigned int tlb_invals = 0;
+
+void tlb_map(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
+void tlb_map_mem(uint32_t, uint32_t, uint32_t);
+void tlb_dump(void);
+
+void
+booke_init_tlb(vm_paddr_t fdt_immr_pa)
+{
+
+ /* Map register space */
+ tlb_map(APM86XXX_DEEP_SLEEP_VA,
+ OCP_ADDR_WORDLO(APM86XXX_DEEP_SLEEP_PA),
+ OCP_ADDR_WORDHI(APM86XXX_DEEP_SLEEP_PA), TLB_VALID | TLB_SIZE_16M,
+ TLB_SW | TLB_SR | TLB_I | TLB_G);
+
+ tlb_map(APM86XXX_CSR_VA, OCP_ADDR_WORDLO(APM86XXX_CSR_PA),
+ OCP_ADDR_WORDHI(APM86XXX_CSR_PA), TLB_VALID | TLB_SIZE_16M,
+ TLB_SW | TLB_SR | TLB_I | TLB_G);
+
+ tlb_map(APM86XXX_PRIMARY_FABRIC_VA,
+ OCP_ADDR_WORDLO(APM86XXX_PRIMARY_FABRIC_PA),
+ OCP_ADDR_WORDHI(APM86XXX_PRIMARY_FABRIC_PA),
+ TLB_VALID | TLB_SIZE_16M,
+ TLB_SW | TLB_SR | TLB_I | TLB_G);
+
+ tlb_map(APM86XXX_AHB_VA, OCP_ADDR_WORDLO(APM86XXX_AHB_PA),
+ OCP_ADDR_WORDHI(APM86XXX_AHB_PA),
+ TLB_VALID | TLB_SIZE_16M,
+ TLB_SW | TLB_SR | TLB_I | TLB_G);
+
+ /* Map MailBox space */
+ tlb_map(APM86XXX_MBOX_VA, OCP_ADDR_WORDLO(APM86XXX_MBOX_PA),
+ OCP_ADDR_WORDHI(APM86XXX_MBOX_PA),
+ TLB_VALID | TLB_SIZE_4K,
+ TLB_UX | TLB_UW | TLB_UR |
+ TLB_SX | TLB_SW | TLB_SR |
+ TLB_I | TLB_G);
+
+ tlb_map(APM86XXX_MBOX_VA + 0x1000,
+ OCP_ADDR_WORDLO(APM86XXX_MBOX_PA) + 0x1000,
+ OCP_ADDR_WORDHI(APM86XXX_MBOX_PA),
+ TLB_VALID | TLB_SIZE_4K,
+ TLB_UX | TLB_UW | TLB_UR |
+ TLB_SX | TLB_SW | TLB_SR |
+ TLB_I | TLB_G);
+
+ tlb_map(APM86XXX_MBOX_VA + 0x2000,
+ OCP_ADDR_WORDLO(APM86XXX_MBOX_PA)+ 0x2000,
+ OCP_ADDR_WORDHI(APM86XXX_MBOX_PA),
+ TLB_VALID | TLB_SIZE_4K,
+ TLB_UX | TLB_UW | TLB_UR |
+ TLB_SX | TLB_SW | TLB_SR |
+ TLB_I | TLB_G);
+}
+
+void
+booke_enable_l1_cache(void)
+{
+}
+
+void
+booke_enable_l2_cache(void)
+{
+}
+
+void
+booke_enable_l3_cache(void)
+{
+}
+
+void
+booke_disable_l2_cache(void)
+{
+ uint32_t ccr1,l2cr0;
+
+ /* Disable L2 cache op broadcast */
+ ccr1 = mfspr(SPR_CCR1);
+ ccr1 &= ~CCR1_L2COBE;
+ mtspr(SPR_CCR1, ccr1);
+
+ /* Set L2 array size to 0 i.e. disable L2 cache */
+ mtdcr(DCR_L2DCDCRAI, DCR_L2CR0);
+ l2cr0 = mfdcr(DCR_L2DCDCRDI);
+ l2cr0 &= ~L2CR0_AS;
+ mtdcr(DCR_L2DCDCRDI, l2cr0);
+}
+
+void tlb_map(uint32_t epn, uint32_t rpn, uint32_t erpn, uint32_t flags,
+ uint32_t perms)
+{
+
+ tlb_write(++tlb_static_entries, epn, rpn, erpn, 0, flags, perms);
+}
+
+static void tlb_dump_entry(u_int entry)
+{
+ uint32_t epn, rpn, erpn, tid, flags, perms;
+ const char *size;
+
+ tlb_read(entry, &epn, &rpn, &erpn, &tid, &flags, &perms);
+
+ switch (flags & TLB_SIZE_MASK) {
+ case TLB_SIZE_1K:
+ size = " 1k";
+ break;
+ case TLB_SIZE_4K:
+ size = " 4k";
+ break;
+ case TLB_SIZE_16K:
+ size = " 16k";
+ break;
+ case TLB_SIZE_256K:
+ size = "256k";
+ break;
+ case TLB_SIZE_1M:
+ size = " 1M";
+ break;
+ case TLB_SIZE_16M:
+ size = " 16M";
+ break;
+ case TLB_SIZE_256M:
+ size = "256M";
+ break;
+ case TLB_SIZE_1G:
+ size = " 1G";
+ break;
+ default:
+ size = "????";
+ break;
+ }
+
+
+ printf("TLB[%02u]: 0x%08X => "
+ "0x%01X_%08X %s %c %c %s %s %s %s %s "
+ "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c (%u)\n",
+ entry, epn, erpn, rpn, size,
+ (flags & TLB_TS) ? '1' : '0',
+ (flags & TLB_VALID) ? 'V' : '.',
+ (perms & TLB_WL1) ? "WL1" : "___",
+ (perms & TLB_IL1I) ? "IL1I" : "____",
+ (perms & TLB_IL1D) ? "IL1D" : "____",
+ (perms & TLB_IL2I) ? "IL2I" : "____",
+ (perms & TLB_IL2D) ? "IL2D" : "____",
+ (perms & TLB_U0) ? '1' : '.',
+ (perms & TLB_U1) ? '2' : '.',
+ (perms & TLB_U2) ? '3' : '.',
+ (perms & TLB_U3) ? '4' : '.',
+ (perms & TLB_W) ? 'W' : '.',
+ (perms & TLB_I) ? 'I' : '.',
+ (perms & TLB_M) ? 'M' : '.',
+ (perms & TLB_G) ? 'G' : '.',
+ (perms & TLB_E) ? 'E' : '.',
+ (perms & TLB_UX) ? 'x' : '.',
+ (perms & TLB_UW) ? 'w' : '.',
+ (perms & TLB_UR) ? 'r' : '.',
+ (perms & TLB_SX) ? 'X' : '.',
+ (perms & TLB_SW) ? 'W' : '.',
+ (perms & TLB_SR) ? 'R' : '.',
+ tid);
+}
+
+void tlb_dump(void)
+{
+ int i;
+
+ for (i = 0; i < TLB_SIZE; i++)
+ tlb_dump_entry(i);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/booke/platform_bare.c
--- a/head/sys/powerpc/booke/platform_bare.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/booke/platform_bare.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2008-2009 Semihalf, Rafal Jaworowski
+ * Copyright (c) 2008-2012 Semihalf.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/booke/platform_bare.c 228201 2011-12-02 15:24:39Z jchandra $");
+__FBSDID("$FreeBSD: head/sys/powerpc/booke/platform_bare.c 236325 2012-05-30 18:05:48Z raj $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -56,7 +56,8 @@
#ifdef SMP
extern void *ap_pcpu;
extern uint8_t __boot_page[]; /* Boot page body */
-extern uint32_t kernload_ap; /* Kernel physical load address */
+extern uint32_t bp_kernload; /* Kernel physical load address */
+extern uint32_t bp_trace; /* AP boot trace field */
#endif
extern uint32_t *bootinfo;
@@ -72,10 +73,10 @@
static int bare_smp_get_bsp(platform_t, struct cpuref *cpuref);
static int bare_smp_start_cpu(platform_t, struct pcpu *cpu);
-static void e500_reset(platform_t);
+static void booke_reset(platform_t);
static platform_method_t bare_methods[] = {
- PLATFORMMETHOD(platform_probe, bare_probe),
+ PLATFORMMETHOD(platform_probe, bare_probe),
PLATFORMMETHOD(platform_mem_regions, bare_mem_regions),
PLATFORMMETHOD(platform_timebase_freq, bare_timebase_freq),
@@ -84,7 +85,7 @@
PLATFORMMETHOD(platform_smp_get_bsp, bare_smp_get_bsp),
PLATFORMMETHOD(platform_smp_start_cpu, bare_smp_start_cpu),
- PLATFORMMETHOD(platform_reset, e500_reset),
+ PLATFORMMETHOD(platform_reset, booke_reset),
{ 0, 0 }
};
@@ -100,26 +101,16 @@
static int
bare_probe(platform_t plat)
{
- uint32_t ver, sr;
+ phandle_t cpus, child;
+ uint32_t sr;
int i, law_max, tgt;
- ver = SVR_VER(mfspr(SPR_SVR));
- switch (ver & ~0x0008) { /* Mask Security Enabled bit */
- case SVR_P4080:
- maxcpu = 8;
- break;
- case SVR_P4040:
- maxcpu = 4;
- break;
- case SVR_MPC8572:
- case SVR_P1020:
- case SVR_P2020:
- maxcpu = 2;
- break;
- default:
+ if ((cpus = OF_finddevice("/cpus")) != 0) {
+ for (maxcpu = 0, child = OF_child(cpus); child != 0;
+ child = OF_peer(child), maxcpu++)
+ ;
+ } else
maxcpu = 1;
- break;
- }
/*
* Clear local access windows. Skip DRAM entries, so we don't shoot
@@ -152,9 +143,9 @@
int i, rv;
rv = fdt_get_mem_regions(avail_regions, availsz, &memsize);
-
if (rv != 0)
- return;
+ panic("%s: could not retrieve mem regions from the 'memory' "
+ "node, error: %d", __func__, rv);
for (i = 0; i < *availsz; i++) {
if (avail_regions[i].mr_start < 1048576) {
@@ -262,8 +253,8 @@
eebpcr = ccsr_read4(OCP85XX_EEBPCR);
if ((eebpcr & (1 << (pc->pc_cpuid + 24))) != 0) {
- printf("%s: CPU=%d already out of hold-off state!\n",
- __func__, pc->pc_cpuid);
+ printf("SMP: CPU %d already out of hold-off state!\n",
+ pc->pc_cpuid);
return (ENXIO);
}
@@ -273,12 +264,13 @@
/*
* Set BPTR to the physical address of the boot page
*/
- bptr = ((uint32_t)__boot_page - KERNBASE) + kernload_ap;
+ bptr = ((uint32_t)__boot_page - KERNBASE) + bp_kernload;
ccsr_write4(OCP85XX_BPTR, (bptr >> 12) | 0x80000000);
/*
* Release AP from hold-off state
*/
+ bp_trace = 0;
eebpcr |= (1 << (pc->pc_cpuid + 24));
ccsr_write4(OCP85XX_EEBPCR, eebpcr);
__asm __volatile("isync; msync");
@@ -287,6 +279,16 @@
while (!pc->pc_awake && timeout--)
DELAY(1000); /* wait 1ms */
+ /*
+ * Disable boot page translation so that the 4K page at the default
+ * address (= 0xfffff000) isn't permanently remapped and thus not
+ * usable otherwise.
+ */
+ ccsr_write4(OCP85XX_BPTR, 0);
+
+ if (!pc->pc_awake)
+ printf("SMP: CPU %d didn't wake up (trace code %#x).\n",
+ pc->pc_awake, bp_trace);
return ((pc->pc_awake) ? 0 : EBUSY);
#else
/* No SMP support */
@@ -295,7 +297,7 @@
}
static void
-e500_reset(platform_t plat)
+booke_reset(platform_t plat)
{
/*
@@ -316,6 +318,7 @@
mtspr(SPR_DBCR0, mfspr(SPR_DBCR0) | DBCR0_IDM | DBCR0_RST_SYSTEM);
printf("Reset failed...\n");
- while (1);
+ while (1)
+ ;
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/booke/pmap.c
--- a/head/sys/powerpc/booke/pmap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/booke/pmap.c Wed Jul 25 16:40:53 2012 +0300
@@ -49,7 +49,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/booke/pmap.c 225841 2011-09-28 15:01:20Z kib $");
+__FBSDID("$FreeBSD: head/sys/powerpc/booke/pmap.c 238357 2012-07-10 22:10:21Z alc $");
#include <sys/types.h>
#include <sys/param.h>
@@ -101,10 +101,6 @@
#define TODO panic("%s: not implemented", __func__);
-#include "opt_sched.h"
-#ifndef SCHED_4BSD
-#error "e500 only works with SCHED_4BSD which uses a global scheduler lock."
-#endif
extern struct mtx sched_lock;
extern int dumpsys_minidump;
@@ -115,7 +111,7 @@
extern uint32_t *bootinfo;
#ifdef SMP
-extern uint32_t kernload_ap;
+extern uint32_t bp_kernload;
#endif
vm_paddr_t kernload;
@@ -290,8 +286,8 @@
static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t);
static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t);
-static boolean_t mmu_booke_ts_referenced(mmu_t, vm_page_t);
-static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t,
+static int mmu_booke_ts_referenced(mmu_t, vm_page_t);
+static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t,
int);
static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t,
vm_paddr_t *);
@@ -316,12 +312,12 @@
static void mmu_booke_activate(mmu_t, struct thread *);
static void mmu_booke_deactivate(mmu_t, struct thread *);
static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t);
-static void *mmu_booke_mapdev(mmu_t, vm_offset_t, vm_size_t);
+static void *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t);
static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t);
-static vm_offset_t mmu_booke_kextract(mmu_t, vm_offset_t);
-static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_offset_t);
+static vm_paddr_t mmu_booke_kextract(mmu_t, vm_offset_t);
+static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t);
static void mmu_booke_kremove(mmu_t, vm_offset_t);
-static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t);
+static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
static void mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t,
vm_size_t);
static vm_offset_t mmu_booke_dumpsys_map(mmu_t, struct pmap_md *,
@@ -967,10 +963,9 @@
debugf("mmu_booke_bootstrap: entered\n");
#ifdef SMP
- kernload_ap = kernload;
+ bp_kernload = kernload;
#endif
-
/* Initialize invalidation mutex */
mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN);
@@ -981,8 +976,13 @@
* Align kernel start and end address (kernel image).
* Note that kernel end does not necessarily relate to kernsize.
* kernsize is the size of the kernel that is actually mapped.
+ * Also note that "start - 1" is deliberate. With SMP, the
+ * entry point is exactly a page from the actual load address.
+ * As such, trunc_page() has no effect and we're off by a page.
+ * Since we always have the ELF header between the load address
+ * and the entry point, we can safely subtract 1 to compensate.
*/
- kernstart = trunc_page(start);
+ kernstart = trunc_page(start - 1);
data_start = round_page(kernelend);
data_end = data_start;
@@ -1233,9 +1233,9 @@
* entries, but for pte_vatopa() to work correctly with kernel area
* addresses.
*/
- for (va = KERNBASE; va < data_end; va += PAGE_SIZE) {
+ for (va = kernstart; va < data_end; va += PAGE_SIZE) {
pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]);
- pte->rpn = kernload + (va - KERNBASE);
+ pte->rpn = kernload + (va - kernstart);
pte->flags = PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED |
PTE_VALID;
}
@@ -1387,7 +1387,7 @@
* Map a wired page into kernel virtual address space.
*/
static void
-mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa)
+mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
{
unsigned int pdir_idx = PDIR_IDX(va);
unsigned int ptbl_idx = PTBL_IDX(va);
@@ -1397,9 +1397,7 @@
KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
(va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va"));
- flags = 0;
- flags |= (PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID);
- flags |= PTE_M;
+ flags = PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID;
pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]);
@@ -1812,8 +1810,8 @@
* Map a range of physical addresses into kernel virtual address space.
*/
static vm_offset_t
-mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start,
- vm_offset_t pa_end, int prot)
+mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
+ vm_paddr_t pa_end, int prot)
{
vm_offset_t sva = *virt;
vm_offset_t va = sva;
@@ -2439,7 +2437,7 @@
}
static int
-mmu_booke_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size)
+mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
{
int i;
vm_offset_t va;
@@ -2597,7 +2595,7 @@
* for mapping device memory, NOT real memory.
*/
static void *
-mmu_booke_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size)
+mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
{
void *res;
uintptr_t va;
@@ -3044,6 +3042,10 @@
/* Map in CCSRBAR. */
tlb1_set_entry(CCSRBAR_VA, ccsrbar, CCSRBAR_SIZE, _TLB_ENTRY_IO);
+ /* Purge the remaining entries */
+ for (i = tlb1_idx; i < TLB1_ENTRIES; i++)
+ tlb1_write_entry(i);
+
/* Setup TLB miss defaults */
set_mas4_defaults();
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/booke/trap.c
--- a/head/sys/powerpc/booke/trap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/booke/trap.c Wed Jul 25 16:40:53 2012 +0300
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/booke/trap.c 225474 2011-09-11 16:05:09Z kib $");
+__FBSDID("$FreeBSD: head/sys/powerpc/booke/trap.c 238032 2012-07-02 21:18:09Z marcel $");
#include "opt_fpu_emu.h"
@@ -143,6 +143,13 @@
int sig, type, user;
ksiginfo_t ksi;
+#ifdef KDB
+ if (kdb_active) {
+ kdb_reenter();
+ return;
+ }
+#endif
+
PCPU_INC(cnt.v_trap);
td = curthread;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/booke/trap_subr.S
--- a/head/sys/powerpc/booke/trap_subr.S Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/booke/trap_subr.S Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/booke/trap_subr.S 238033 2012-07-02 21:21:12Z marcel $
*/
/*-
* Copyright (C) 1995, 1996 Wolfgang Solfrank.
@@ -668,7 +668,7 @@
lwarx %r21, %r23, %r25 /* get pte->flags */
oris %r21, %r21, PTE_REFERENCED at h /* set referenced bit */
- andi. %r22, %r21, (PTE_UW | PTE_UW)@l /* check if writable */
+ andi. %r22, %r21, (PTE_SW | PTE_UW)@l /* check if writable */
beq 2f
oris %r21, %r21, PTE_MODIFIED at h /* set modified bit */
2:
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/conf/DEFAULTS
--- a/head/sys/powerpc/conf/DEFAULTS Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/conf/DEFAULTS Wed Jul 25 16:40:53 2012 +0300
@@ -1,16 +1,14 @@
#
# DEFAULTS -- Default kernel configuration file for FreeBSD/powerpc
#
-# $FreeBSD: head/sys/powerpc/conf/DEFAULTS 232619 2012-03-06 20:01:25Z attilio $
+# $FreeBSD: head/sys/powerpc/conf/DEFAULTS 238034 2012-07-02 21:25:24Z marcel $
# Pseudo devices.
device mem # Memory and kernel memory devices
# UART chips on this platform
device uart_ns8250
-device uart_z8530
-options GEOM_PART_APM
options GEOM_PART_MBR
options NEW_PCIB
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/conf/GENERIC
--- a/head/sys/powerpc/conf/GENERIC Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/conf/GENERIC Wed Jul 25 16:40:53 2012 +0300
@@ -16,19 +16,19 @@
# If you are in doubt as to the purpose or necessity of a line, check first
# in NOTES.
#
-# $FreeBSD: head/sys/powerpc/conf/GENERIC 233271 2012-03-21 08:38:42Z ed $
+# $FreeBSD: head/sys/powerpc/conf/GENERIC 238034 2012-07-02 21:25:24Z marcel $
cpu AIM
ident GENERIC
-machine powerpc powerpc
+machine powerpc powerpc
makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols
# Platform support
options POWERMAC #NewWorld Apple PowerMacs
options PSIM #GDB PSIM ppc simulator
-options MAMBO #IBM Mambo Full System Simulator
+options MAMBO #IBM Mambo Full System Simulator
options SCHED_ULE #ULE scheduler
options PREEMPTION #Enable kernel thread preemption
@@ -49,6 +49,7 @@
options CD9660 #ISO 9660 Filesystem
options PROCFS #Process filesystem (requires PSEUDOFS)
options PSEUDOFS #Pseudo-filesystem framework
+options GEOM_PART_APM #Apple Partition Maps.
options GEOM_PART_GPT #GUID Partition Tables.
options GEOM_LABEL #Provides labelization
options COMPAT_FREEBSD4 #Keep this for a while
@@ -82,7 +83,7 @@
options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed
options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones
-# To make an SMP kernel, the next line is needed
+# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel
# CPU frequency control
@@ -107,6 +108,7 @@
device isp # Qlogic family
device ispfw # Firmware module for Qlogic host adapters
device mpt # LSI-Logic MPT-Fusion
+device mps # LSI-Logic MPT-Fusion 2
device sym # NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D
# ATA/SCSI peripherals
@@ -126,6 +128,7 @@
# Serial (COM) ports
device scc
device uart
+device uart_z8530
# PCI Ethernet NICs that use the common MII bus controller code.
device miibus # MII bus support
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/conf/GENERIC64
--- a/head/sys/powerpc/conf/GENERIC64 Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/conf/GENERIC64 Wed Jul 25 16:40:53 2012 +0300
@@ -16,19 +16,19 @@
# If you are in doubt as to the purpose or necessity of a line, check first
# in NOTES.
#
-# $FreeBSD: head/sys/powerpc/conf/GENERIC64 233271 2012-03-21 08:38:42Z ed $
+# $FreeBSD: head/sys/powerpc/conf/GENERIC64 238034 2012-07-02 21:25:24Z marcel $
cpu AIM
ident GENERIC
-machine powerpc powerpc64
+machine powerpc powerpc64
makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols
# Platform support
options POWERMAC #NewWorld Apple PowerMacs
-options PS3 #Sony Playstation 3
-options MAMBO #IBM Mambo Full System Simulator
+options PS3 #Sony Playstation 3
+options MAMBO #IBM Mambo Full System Simulator
options SCHED_ULE #ULE scheduler
options PREEMPTION #Enable kernel thread preemption
@@ -49,6 +49,7 @@
options CD9660 #ISO 9660 Filesystem
options PROCFS #Process filesystem (requires PSEUDOFS)
options PSEUDOFS #Pseudo-filesystem framework
+options GEOM_PART_APM #Apple Partition Maps.
options GEOM_PART_GPT #GUID Partition Tables.
options GEOM_LABEL #Provides labelization
options COMPAT_FREEBSD32 #Compatible with FreeBSD/powerpc binaries
@@ -67,8 +68,11 @@
options MAC # TrustedBSD MAC Framework
options INCLUDE_CONFIG_FILE # Include this file in kernel
-# Debugging for use in -current
-options KDB #Enable the kernel debugger
+# Debugging support. Always need this:
+options KDB # Enable kernel debugger support.
+# For minimum debugger support (stable branch) use:
+#options KDB_TRACE # Print a stack trace for a panic.
+# For full debugger support use this instead:
options DDB #Support DDB
#options DEADLKRES #Enable the deadlock resolver
options INVARIANTS #Enable calls of extra sanity checking
@@ -102,6 +106,7 @@
device isp # Qlogic family
device ispfw # Firmware module for Qlogic host adapters
device mpt # LSI-Logic MPT-Fusion
+device mps # LSI-Logic MPT-Fusion 2
device sym # NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D
# ATA/SCSI peripherals
@@ -121,6 +126,7 @@
# Serial (COM) ports
device scc
device uart
+device uart_z8530
# Ethernet hardware
device glc # Sony Playstation 3 Ethernet
@@ -170,8 +176,8 @@
device kue # Kawasaki LSI USB Ethernet
# Wireless NIC cards
-options IEEE80211_SUPPORT_MESH
-options AH_SUPPORT_AR5416
+options IEEE80211_SUPPORT_MESH
+options AH_SUPPORT_AR5416
# FireWire support
device firewire # FireWire bus code
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/conf/MPC85XX
--- a/head/sys/powerpc/conf/MPC85XX Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/conf/MPC85XX Wed Jul 25 16:40:53 2012 +0300
@@ -1,10 +1,11 @@
#
# Custom kernel for Freescale MPC85XX development boards like the CDS etc.
#
-# $FreeBSD: head/sys/powerpc/conf/MPC85XX 233271 2012-03-21 08:38:42Z ed $
+# $FreeBSD: head/sys/powerpc/conf/MPC85XX 236141 2012-05-27 10:25:20Z raj $
#
-cpu E500
+cpu BOOKE
+cpu BOOKE_E500
ident MPC85XX
machine powerpc powerpc
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/conf/NOTES
--- a/head/sys/powerpc/conf/NOTES Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/conf/NOTES Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-# $FreeBSD: head/sys/powerpc/conf/NOTES 222686 2011-06-04 15:17:35Z andreast $
+# $FreeBSD: head/sys/powerpc/conf/NOTES 236141 2012-05-27 10:25:20Z raj $
#
# This file contains machine dependent kernel configuration notes. For
# machine independent notes, look in /sys/conf/NOTES.
@@ -14,7 +14,8 @@
#
# You must specify at least one CPU (the one you intend to run on).
cpu AIM
-#cpu E500
+#cpu BOOKE_E500
+#cpu BOOKE_PPC440
options FPU_EMU
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/_stdint.h
--- a/head/sys/powerpc/include/_stdint.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/_stdint.h Wed Jul 25 16:40:53 2012 +0300
@@ -34,7 +34,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD: head/sys/powerpc/include/_stdint.h 229494 2012-01-04 16:02:52Z andreast $
+ * $FreeBSD: head/sys/powerpc/include/_stdint.h 237517 2012-06-24 04:15:58Z andrew $
*/
#ifndef _MACHINE__STDINT_H_
@@ -189,12 +189,6 @@
#define SIZE_MAX UINT32_MAX
#endif
-#ifndef WCHAR_MIN /* Also possibly defined in <wchar.h> */
-/* Limits of wchar_t. */
-#define WCHAR_MIN INT32_MIN
-#define WCHAR_MAX INT32_MAX
-#endif
-
/* Limits of wint_t. */
#define WINT_MIN INT32_MIN
#define WINT_MAX INT32_MAX
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/_types.h
--- a/head/sys/powerpc/include/_types.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/_types.h Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
*
* From: @(#)ansi.h 8.2 (Berkeley) 1/4/94
* From: @(#)types.h 8.3 (Berkeley) 1/5/94
- * $FreeBSD: head/sys/powerpc/include/_types.h 230229 2012-01-16 20:17:51Z das $
+ * $FreeBSD: head/sys/powerpc/include/_types.h 237517 2012-06-24 04:15:58Z andrew $
*/
#ifndef _MACHINE__TYPES_H_
@@ -133,6 +133,10 @@
#endif
typedef __int64_t __vm_ooffset_t;
typedef __uint64_t __vm_pindex_t;
+typedef int __wchar_t;
+
+#define __WCHAR_MIN __INT_MIN /* min value for a wchar_t */
+#define __WCHAR_MAX __INT_MAX /* max value for a wchar_t */
/*
* Unusual type definitions.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/atomic.h
--- a/head/sys/powerpc/include/atomic.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/atomic.h Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/powerpc/include/atomic.h 222198 2011-05-22 20:55:54Z attilio $
+ * $FreeBSD: head/sys/powerpc/include/atomic.h 235946 2012-05-24 23:46:17Z bz $
*/
#ifndef _MACHINE_ATOMIC_H_
@@ -36,12 +36,30 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
-#define __ATOMIC_BARRIER \
- __asm __volatile("sync" : : : "memory")
+/*
+ * The __ATOMIC_REL/ACQ() macros provide memory barriers only in conjunction
+ * with the atomic lXarx/stXcx. sequences below. They are not exposed outside
+ * of this file. See also Appendix B.2 of Book II of the architecture manual.
+ *
+ * Note that not all Book-E processors accept the light-weight sync variant.
+ * In particular, early models of E500 cores are known to wedge. Bank on all
+ * 64-bit capable CPUs to accept lwsync properly and pressimize 32-bit CPUs
+ * to use the heavier-weight sync.
+ */
-#define mb() __ATOMIC_BARRIER
-#define wmb() mb()
-#define rmb() mb()
+#ifdef __powerpc64__
+#define mb() __asm __volatile("lwsync" : : : "memory")
+#define rmb() __asm __volatile("lwsync" : : : "memory")
+#define wmb() __asm __volatile("lwsync" : : : "memory")
+#define __ATOMIC_REL() __asm __volatile("lwsync" : : : "memory")
+#define __ATOMIC_ACQ() __asm __volatile("isync" : : : "memory")
+#else
+#define mb() __asm __volatile("sync" : : : "memory")
+#define rmb() __asm __volatile("sync" : : : "memory")
+#define wmb() __asm __volatile("sync" : : : "memory")
+#define __ATOMIC_REL() __asm __volatile("sync" : : : "memory")
+#define __ATOMIC_ACQ() __asm __volatile("isync" : : : "memory")
+#endif
/*
* atomic_add(p, v)
@@ -94,13 +112,13 @@
atomic_add_acq_##type(volatile u_##type *p, u_##type v) { \
u_##type t; \
__atomic_add_##type(p, v, t); \
- __ATOMIC_BARRIER; \
+ __ATOMIC_ACQ(); \
} \
\
static __inline void \
atomic_add_rel_##type(volatile u_##type *p, u_##type v) { \
u_##type t; \
- __ATOMIC_BARRIER; \
+ __ATOMIC_REL(); \
__atomic_add_##type(p, v, t); \
} \
/* _ATOMIC_ADD */
@@ -180,13 +198,13 @@
atomic_clear_acq_##type(volatile u_##type *p, u_##type v) { \
u_##type t; \
__atomic_clear_##type(p, v, t); \
- __ATOMIC_BARRIER; \
+ __ATOMIC_ACQ(); \
} \
\
static __inline void \
atomic_clear_rel_##type(volatile u_##type *p, u_##type v) { \
u_##type t; \
- __ATOMIC_BARRIER; \
+ __ATOMIC_REL(); \
__atomic_clear_##type(p, v, t); \
} \
/* _ATOMIC_CLEAR */
@@ -282,13 +300,13 @@
atomic_set_acq_##type(volatile u_##type *p, u_##type v) { \
u_##type t; \
__atomic_set_##type(p, v, t); \
- __ATOMIC_BARRIER; \
+ __ATOMIC_ACQ(); \
} \
\
static __inline void \
atomic_set_rel_##type(volatile u_##type *p, u_##type v) { \
u_##type t; \
- __ATOMIC_BARRIER; \
+ __ATOMIC_REL(); \
__atomic_set_##type(p, v, t); \
} \
/* _ATOMIC_SET */
@@ -368,13 +386,13 @@
atomic_subtract_acq_##type(volatile u_##type *p, u_##type v) { \
u_##type t; \
__atomic_subtract_##type(p, v, t); \
- __ATOMIC_BARRIER; \
+ __ATOMIC_ACQ(); \
} \
\
static __inline void \
atomic_subtract_rel_##type(volatile u_##type *p, u_##type v) { \
u_##type t; \
- __ATOMIC_BARRIER; \
+ __ATOMIC_REL(); \
__atomic_subtract_##type(p, v, t); \
} \
/* _ATOMIC_SUBTRACT */
@@ -481,14 +499,14 @@
u_##TYPE v; \
\
v = *p; \
- __ATOMIC_BARRIER; \
+ mb(); \
return (v); \
} \
\
static __inline void \
atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \
{ \
- __ATOMIC_BARRIER; \
+ mb(); \
*p = v; \
}
@@ -598,14 +616,14 @@
int retval;
retval = atomic_cmpset_int(p, cmpval, newval);
- __ATOMIC_BARRIER;
+ __ATOMIC_ACQ();
return (retval);
}
static __inline int
atomic_cmpset_rel_int(volatile u_int *p, u_int cmpval, u_int newval)
{
- __ATOMIC_BARRIER;
+ __ATOMIC_REL();
return (atomic_cmpset_int(p, cmpval, newval));
}
@@ -615,14 +633,14 @@
u_long retval;
retval = atomic_cmpset_long(p, cmpval, newval);
- __ATOMIC_BARRIER;
+ __ATOMIC_ACQ();
return (retval);
}
static __inline int
atomic_cmpset_rel_long(volatile u_long *p, u_long cmpval, u_long newval)
{
- __ATOMIC_BARRIER;
+ __ATOMIC_REL();
return (atomic_cmpset_long(p, cmpval, newval));
}
@@ -672,4 +690,7 @@
#define atomic_fetchadd_64 atomic_fetchadd_long
#endif
+#undef __ATOMIC_REL
+#undef __ATOMIC_ACQ
+
#endif /* ! _MACHINE_ATOMIC_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/cpu.h
--- a/head/sys/powerpc/include/cpu.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/cpu.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $NetBSD: cpu.h,v 1.11 2000/05/26 21:19:53 thorpej Exp $
- * $FreeBSD: head/sys/powerpc/include/cpu.h 234156 2012-04-11 22:23:50Z nwhitehorn $
+ * $FreeBSD: head/sys/powerpc/include/cpu.h 234785 2012-04-29 11:04:31Z dim $
*/
#ifndef _MACHINE_CPU_H_
@@ -99,6 +99,6 @@
void swi_vm(void *);
/* XXX the following should not be here. */
-void savectx(struct pcb *);
+void savectx(struct pcb *) __returns_twice;
#endif /* _MACHINE_CPU_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/cpufunc.h
--- a/head/sys/powerpc/include/cpufunc.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/cpufunc.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,22 +23,12 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/cpufunc.h 234589 2012-04-22 21:55:19Z nwhitehorn $
*/
#ifndef _MACHINE_CPUFUNC_H_
#define _MACHINE_CPUFUNC_H_
-/*
- * Required for user-space atomic.h includes
- */
-static __inline void
-powerpc_mb(void)
-{
-
- __asm __volatile("eieio; sync" : : : "memory");
-}
-
#ifdef _KERNEL
#include <sys/types.h>
@@ -176,21 +166,21 @@
eieio(void)
{
- __asm __volatile ("eieio");
+ __asm __volatile ("eieio" : : : "memory");
}
static __inline void
isync(void)
{
- __asm __volatile ("isync");
+ __asm __volatile ("isync" : : : "memory");
}
static __inline void
powerpc_sync(void)
{
- __asm __volatile ("sync");
+ __asm __volatile ("sync" : : : "memory");
}
static __inline register_t
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/elf.h
--- a/head/sys/powerpc/include/elf.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/elf.h Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/elf.h 237430 2012-06-22 06:38:31Z kib $
*/
#ifndef _MACHINE_ELF_H_
@@ -106,8 +106,9 @@
#define AT_PAGESIZES 18 /* Pagesizes. */
#define AT_PAGESIZESLEN 19 /* Number of pagesizes. */
#define AT_STACKPROT 21 /* Initial stack protection. */
+#define AT_TIMEKEEP 22 /* Pointer to timehands. */
-#define AT_COUNT 22 /* Count of defined aux entry types. */
+#define AT_COUNT 23 /* Count of defined aux entry types. */
/*
* Relocation types.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/hid.h
--- a/head/sys/powerpc/include/hid.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/hid.h Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $NetBSD: hid.h,v 1.2 2001/08/22 21:05:25 matt Exp $
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/hid.h 236025 2012-05-25 21:12:24Z raj $
*/
#ifndef _POWERPC_HID_H_
@@ -78,6 +78,8 @@
#define HID0_E500_SEL_TBCLK 0x00002000 /* Select Time Base clock */
#define HID0_E500_MAS7UPDEN 0x00000080 /* Enable MAS7 update (e500v2) */
+#define HID0_E500MC_L2MMU_MHD 0x40000000 /* L2MMU Multiple Hit Detection */
+
#define HID0_BITMASK \
"\20" \
"\040EMCP\037DBP\036EBA\035EBD\034BCLK\033EICE\032ECLK\031PAR" \
@@ -105,6 +107,20 @@
"\027NAP\025DPM\023TG\022HANGDETECT\021NHR\020INORDER" \
"\016TBCTRL\015TBEN\012CIABREN\011HDICEEN\001ENATTN"
+#define HID0_E500MC_BITMASK \
+ "\20" \
+ "\040EMCP\037EN_L2MMU_MHD\036b2\035b3\034b4\033b5\032b6\031b7" \
+ "\030b8\027b9\026b10\025b11\024b12\023b13\022b14\021b15" \
+ "\020b16\017b17\016b18\015b19\014b20\013b21\012b22\011b23" \
+ "\010EN_MAS7_UPDATE\007DCFA\006b26\005CIGLSO\004b28\003b29\002b30\001NOPTI"
+
+#define HID0_E5500_BITMASK \
+ "\20" \
+ "\040EMCP\037EN_L2MMU_MHD\036b2\035b3\034b4\033b5\032b6\031b7" \
+ "\030b8\027b9\026b10\025b11\024b12\023b13\022b14\021b15" \
+ "\020b16\017b17\016b18\015b19\014b20\013b21\012b22\011b23" \
+ "\010b24\007DCFA\006b26\005CIGLSO\004b28\003b29\002b30\001NOPTI"
+
/*
* HID0 bit definitions per cpu model
*
@@ -142,6 +158,40 @@
* 30 - - - NOPDST NOPDST NOPDST NOPDST -
* 31 NOOPTI - NOOPTI NOPTI NOPTI NOPTI NOPTI NOPTI
*
+ * bit e500mc e5500
+ * 0 EMCP EMCP
+ * 1 EN_L2MMU_MHD EN_L2MMU_MHD
+ * 2 - -
+ * 3 - -
+ * 4 - -
+ * 5 - -
+ * 6 - -
+ * 7 - -
+ * 8 - -
+ * 9 - -
+ * 10 - -
+ * 11 - -
+ * 12 - -
+ * 13 - -
+ * 14 - -
+ * 15 - -
+ * 16 - -
+ * 17 - -
+ * 18 - -
+ * 19 - -
+ * 20 - -
+ * 21 - -
+ * 22 - -
+ * 23 - -
+ * 24 EN_MAS7_UPDATE -
+ * 25 DCFA DCFA
+ * 26 - -
+ * 27 CIGLSO CIGLSO
+ * 28 - -
+ * 29 - -
+ * 30 - -
+ * 31 NOPTI NOPTI
+ *
* 604: ECP = Enable cache parity checking
* 604: SIE = Serial instruction execution disable
* 7450: TBEN = Time Base Enable
@@ -160,6 +210,9 @@
#define HID0_E500_DEFAULT_SET (HID0_EMCP | HID0_E500_TBEN)
#define HID1_E500_DEFAULT_SET (HID1_E500_ABE | HID1_E500_ASTME)
+#define HID0_E500MC_DEFAULT_SET (HID0_EMCP | HID0_E500MC_L2MMU_MHD | \
+ HID0_E500_MAS7UPDEN)
+#define HID0_E5500_DEFAULT_SET (HID0_EMCP | HID0_E500MC_L2MMU_MHD)
#define HID5_970_DCBZ_SIZE_HI 0x00000080UL /* dcbz does a 32-byte store */
#define HID4_970_DISABLE_LG_PG 0x00000004ULL /* disables large pages */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/in_cksum.h
--- a/head/sys/powerpc/include/in_cksum.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/in_cksum.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* from tahoe: in_cksum.c 1.2 86/01/05
* from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
* from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/in_cksum.h 235941 2012-05-24 22:00:48Z bz $
*/
#ifndef _MACHINE_IN_CKSUM_H_
@@ -39,6 +39,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -65,9 +66,12 @@
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/machdep.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/powerpc/include/machdep.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2011-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/powerpc/include/machdep.h 236324 2012-05-30 17:34:40Z raj $
+ */
+
+#ifndef _POWERPC_MACHDEP_H_
+#define _POWERPC_MACHDEP_H_
+
+void booke_disable_l2_cache(void);
+void booke_enable_l1_cache(void);
+void booke_enable_l2_cache(void);
+void booke_enable_l3_cache(void);
+void booke_enable_bpred(void);
+void booke_init_tlb(vm_paddr_t);
+
+#endif /* _POWERPC_MACHDEP_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/pcpu.h
--- a/head/sys/powerpc/include/pcpu.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/pcpu.h Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/powerpc/include/pcpu.h 230123 2012-01-15 00:08:14Z nwhitehorn $
+ * $FreeBSD: head/sys/powerpc/include/pcpu.h 236141 2012-05-27 10:25:20Z raj $
*/
#ifndef _MACHINE_PCPU_H_
@@ -115,7 +115,7 @@
PCPU_MD_COMMON_FIELDS \
PCPU_MD_AIM_FIELDS
#endif
-#ifdef E500
+#if defined(BOOKE)
#define PCPU_MD_FIELDS \
PCPU_MD_COMMON_FIELDS \
PCPU_MD_BOOKE_FIELDS
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/pio.h
--- a/head/sys/powerpc/include/pio.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/pio.h Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
*
* $NetBSD: pio.h,v 1.1 1998/05/15 10:15:54 tsubai Exp $
* $OpenBSD: pio.h,v 1.1 1997/10/13 10:53:47 pefo Exp $
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/pio.h 235013 2012-05-04 16:00:22Z nwhitehorn $
*/
#ifndef _MACHINE_PIO_H_
@@ -39,46 +39,53 @@
* I/O macros.
*/
+/*
+ * Use sync so that bus space operations cannot sneak out the bottom of
+ * mutex-protected sections (mutex release does not guarantee completion of
+ * accesses to caching-inhibited memory on some systems)
+ */
+#define powerpc_iomb() __asm __volatile("sync" : : : "memory")
+
static __inline void
__outb(volatile u_int8_t *a, u_int8_t v)
{
*a = v;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
__outw(volatile u_int16_t *a, u_int16_t v)
{
*a = v;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
__outl(volatile u_int32_t *a, u_int32_t v)
{
*a = v;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
__outll(volatile u_int64_t *a, u_int64_t v)
{
*a = v;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
__outwrb(volatile u_int16_t *a, u_int16_t v)
{
__asm__ volatile("sthbrx %0, 0, %1" :: "r"(v), "r"(a));
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
__outlrb(volatile u_int32_t *a, u_int32_t v)
{
__asm__ volatile("stwbrx %0, 0, %1" :: "r"(v), "r"(a));
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline u_int8_t
@@ -87,7 +94,7 @@
u_int8_t _v_;
_v_ = *a;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
return _v_;
}
@@ -97,7 +104,7 @@
u_int16_t _v_;
_v_ = *a;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
return _v_;
}
@@ -107,7 +114,7 @@
u_int32_t _v_;
_v_ = *a;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
return _v_;
}
@@ -117,7 +124,7 @@
u_int64_t _v_;
_v_ = *a;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
return _v_;
}
@@ -127,7 +134,7 @@
u_int16_t _v_;
__asm__ volatile("lhbrx %0, 0, %1" : "=r"(_v_) : "r"(a));
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
return _v_;
}
@@ -137,7 +144,7 @@
u_int32_t _v_;
__asm__ volatile("lwbrx %0, 0, %1" : "=r"(_v_) : "r"(a));
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
return _v_;
}
@@ -175,7 +182,7 @@
{
while (c--)
*a = *s++;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -183,7 +190,7 @@
{
while (c--)
*a = *s++;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -191,7 +198,7 @@
{
while (c--)
*a = *s++;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -199,7 +206,7 @@
{
while (c--)
*a = *s++;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -207,7 +214,7 @@
{
while (c--)
__asm__ volatile("sthbrx %0, 0, %1" :: "r"(*s++), "r"(a));
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -215,7 +222,7 @@
{
while (c--)
__asm__ volatile("stwbrx %0, 0, %1" :: "r"(*s++), "r"(a));
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -223,7 +230,7 @@
{
while (c--)
*d++ = *a;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -231,7 +238,7 @@
{
while (c--)
*d++ = *a;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -239,7 +246,7 @@
{
while (c--)
*d++ = *a;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -247,7 +254,7 @@
{
while (c--)
*d++ = *a;
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -255,7 +262,7 @@
{
while (c--)
__asm__ volatile("lhbrx %0, 0, %1" : "=r"(*d++) : "r"(a));
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
static __inline void
@@ -263,7 +270,7 @@
{
while (c--)
__asm__ volatile("lwbrx %0, 0, %1" : "=r"(*d++) : "r"(a));
- __asm__ volatile("eieio; sync");
+ powerpc_iomb();
}
#define outsb(a,s,c) (__outsb((volatile u_int8_t *)(a), s, c))
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/pmap.h
--- a/head/sys/powerpc/include/pmap.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/pmap.h Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD: head/sys/powerpc/include/pmap.h 233948 2012-04-06 16:00:37Z nwhitehorn $
+ * $FreeBSD: head/sys/powerpc/include/pmap.h 237168 2012-06-16 18:56:19Z alc $
*/
/*-
* Copyright (C) 1995, 1996 Wolfgang Solfrank.
@@ -94,7 +94,7 @@
struct pvo_entry {
LIST_ENTRY(pvo_entry) pvo_vlink; /* Link to common virt page */
LIST_ENTRY(pvo_entry) pvo_olink; /* Link to overflow entry */
- LIST_ENTRY(pvo_entry) pvo_plink; /* Link to pmap entries */
+ RB_ENTRY(pvo_entry) pvo_plink; /* Link to pmap entries */
union {
struct pte pte; /* 32 bit PTE */
struct lpte lpte; /* 64 bit PTE */
@@ -104,6 +104,9 @@
uint64_t pvo_vpn; /* Virtual page number */
};
LIST_HEAD(pvo_head, pvo_entry);
+RB_HEAD(pvo_tree, pvo_entry);
+int pvo_vaddr_compare(struct pvo_entry *, struct pvo_entry *);
+RB_PROTOTYPE(pvo_tree, pvo_entry, pvo_plink, pvo_vaddr_compare);
#define PVO_PTEGIDX_MASK 0x007UL /* which PTEG slot */
#define PVO_PTEGIDX_VALID 0x008UL /* slot is valid */
@@ -136,7 +139,7 @@
struct pmap *pmap_phys;
struct pmap_statistics pm_stats;
- struct pvo_head pmap_pvo;
+ struct pvo_tree pmap_pvo;
};
struct md_page {
@@ -220,17 +223,19 @@
#define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx)
#define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx)
+#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
+
void pmap_bootstrap(vm_offset_t, vm_offset_t);
-void pmap_kenter(vm_offset_t va, vm_offset_t pa);
+void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
void pmap_kenter_attr(vm_offset_t va, vm_offset_t pa, vm_memattr_t);
void pmap_kremove(vm_offset_t);
-void *pmap_mapdev(vm_offset_t, vm_size_t);
+void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapdev_attr(vm_offset_t, vm_size_t, vm_memattr_t);
void pmap_unmapdev(vm_offset_t, vm_size_t);
void pmap_page_set_memattr(vm_page_t, vm_memattr_t);
void pmap_deactivate(struct thread *);
-vm_offset_t pmap_kextract(vm_offset_t);
-int pmap_dev_direct_mapped(vm_offset_t, vm_size_t);
+vm_paddr_t pmap_kextract(vm_offset_t);
+int pmap_dev_direct_mapped(vm_paddr_t, vm_size_t);
boolean_t pmap_mmu_install(char *name, int prio);
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/profile.h
--- a/head/sys/powerpc/include/profile.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/profile.h Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*
* from: NetBSD: profile.h,v 1.9 1997/04/06 08:47:37 cgd Exp
* from: FreeBSD: src/sys/alpha/include/profile.h,v 1.4 1999/12/29
- * $FreeBSD: head/sys/powerpc/include/profile.h 230400 2012-01-20 22:34:19Z andreast $
+ * $FreeBSD: head/sys/powerpc/include/profile.h 236141 2012-05-27 10:25:20Z raj $
*/
#ifndef _MACHINE_PROFILE_H_
@@ -85,7 +85,7 @@
"_mcount: \n" \
" .quad .L._mcount,.TOC. at tocbase,0\n" \
" .previous \n" \
- " .size main,24 \n" \
+ " .size _mcount,24 \n" \
" .type _mcount, at function \n" \
" .align 4 \n" \
".L._mcount: \n" \
@@ -172,12 +172,13 @@
#define __PROFILE_VECTOR_BASE EXC_RST
#define __PROFILE_VECTOR_TOP (EXC_LAST + 0x100)
#endif /* AIM */
-#ifdef E500
+#if defined(BOOKE)
extern char interrupt_vector_base[];
extern char interrupt_vector_top[];
#define __PROFILE_VECTOR_BASE (uintfptr_t)interrupt_vector_base
#define __PROFILE_VECTOR_TOP (uintfptr_t)interrupt_vector_top
-#endif /* E500 */
+#endif /* BOOKE_E500 || BOOKE_PPC4XX */
+
#endif /* !COMPILING_LINT */
#ifndef __PROFILE_VECTOR_BASE
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/psl.h
--- a/head/sys/powerpc/include/psl.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/psl.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,13 +29,13 @@
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $NetBSD: psl.h,v 1.5 2000/11/19 19:52:37 matt Exp $
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/psl.h 236141 2012-05-27 10:25:20Z raj $
*/
#ifndef _MACHINE_PSL_H_
#define _MACHINE_PSL_H_
-#if defined(E500)
+#if defined(BOOKE_E500)
/*
* Machine State Register (MSR) - e500 core
*
@@ -67,7 +67,29 @@
#define PSL_KERNSET (PSL_CE | PSL_ME | PSL_EE)
#define PSL_USERSET (PSL_KERNSET | PSL_PR)
-#else /* if defined(E500) */
+#elif defined(BOOKE_PPC4XX)
+/*
+ * Machine State Register (MSR) - PPC4xx core
+ */
+#define PSL_WE (0x80000000 >> 13) /* Wait State Enable */
+#define PSL_CE (0x80000000 >> 14) /* Critical Interrupt Enable */
+#define PSL_EE (0x80000000 >> 16) /* External Interrupt Enable */
+#define PSL_PR (0x80000000 >> 17) /* Problem State */
+#define PSL_FP (0x80000000 >> 18) /* Floating Point Available */
+#define PSL_ME (0x80000000 >> 19) /* Machine Check Enable */
+#define PSL_FE0 (0x80000000 >> 20) /* Floating-point exception mode 0 */
+#define PSL_DWE (0x80000000 >> 21) /* Debug Wait Enable */
+#define PSL_DE (0x80000000 >> 22) /* Debug interrupt Enable */
+#define PSL_FE1 (0x80000000 >> 23) /* Floating-point exception mode 1 */
+#define PSL_IS (0x80000000 >> 26) /* Instruction Address Space */
+#define PSL_DS (0x80000000 >> 27) /* Data Address Space */
+
+#define PSL_KERNSET (PSL_CE | PSL_ME | PSL_EE | PSL_FP)
+#define PSL_USERSET (PSL_KERNSET | PSL_PR)
+
+#define PSL_FE_DFLT 0x00000000UL /* default == none */
+
+#else /* if defined(BOOKE_*) */
/*
* Machine State Register (MSR)
*
@@ -127,5 +149,5 @@
#define PSL_USERSTATIC (PSL_USERSET | PSL_IP | 0x87c0008c)
-#endif /* if defined(E500) */
+#endif /* if defined(BOOKE_E500) */
#endif /* _MACHINE_PSL_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/pte.h
--- a/head/sys/powerpc/include/pte.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/pte.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $NetBSD: pte.h,v 1.2 1998/08/31 14:43:40 tsubai Exp $
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/pte.h 236141 2012-05-27 10:25:20Z raj $
*/
#ifndef _MACHINE_PTE_H_
@@ -163,7 +163,7 @@
#endif /* _KERNEL */
#endif /* LOCORE */
-#else
+#else /* BOOKE */
#include <machine/tlb.h>
@@ -224,6 +224,8 @@
/* RPN mask, TLB0 4K pages */
#define PTE_PA_MASK PAGE_MASK
+#if defined(BOOKE_E500)
+
/* PTE bits assigned to MAS2, MAS3 flags */
#define PTE_W MAS2_W
#define PTE_I MAS2_I
@@ -241,6 +243,26 @@
#define PTE_MAS3_MASK ((MAS3_UX | MAS3_SX | MAS3_UW \
| MAS3_SW | MAS3_UR | MAS3_SR) << PTE_MAS3_SHIFT)
+#elif defined(BOOKE_PPC4XX)
+
+#define PTE_WL1 TLB_WL1
+#define PTE_IL2I TLB_IL2I
+#define PTE_IL2D TLB_IL2D
+
+#define PTE_W TLB_W
+#define PTE_I TLB_I
+#define PTE_M TLB_M
+#define PTE_G TLB_G
+
+#define PTE_UX TLB_UX
+#define PTE_SX TLB_SX
+#define PTE_UW TLB_UW
+#define PTE_SW TLB_SW
+#define PTE_UR TLB_UR
+#define PTE_SR TLB_SR
+
+#endif
+
/* Other PTE flags */
#define PTE_VALID 0x80000000 /* Valid */
#define PTE_MODIFIED 0x40000000 /* Modified */
@@ -256,6 +278,5 @@
#define PTE_ISMODIFIED(pte) ((pte)->flags & PTE_MODIFIED)
#define PTE_ISREFERENCED(pte) ((pte)->flags & PTE_REFERENCED)
-#endif /* #elif defined(E500) */
-
+#endif /* BOOKE_PPC4XX */
#endif /* _MACHINE_PTE_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/spr.h
--- a/head/sys/powerpc/include/spr.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/spr.h Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* $NetBSD: spr.h,v 1.25 2002/08/14 15:38:40 matt Exp $
- * $FreeBSD: head/sys/powerpc/include/spr.h 228869 2011-12-24 19:34:52Z jhibbits $
+ * $FreeBSD: head/sys/powerpc/include/spr.h 236141 2012-05-27 10:25:20Z raj $
*/
#ifndef _POWERPC_SPR_H_
#define _POWERPC_SPR_H_
@@ -115,9 +115,9 @@
#define SRR1_ISI_NOEXECUTE 0x10000000 /* Memory marked no-execute */
#define SRR1_ISI_PP 0x08000000 /* PP bits forbid access */
#define SPR_DECAR 0x036 /* ..8 Decrementer auto reload */
-#define SPR_EIE 0x050 /* ..8 Exception Interrupt ??? */
-#define SPR_EID 0x051 /* ..8 Exception Interrupt ??? */
-#define SPR_NRI 0x052 /* ..8 Exception Interrupt ??? */
+#define SPR_EIE 0x050 /* ..8 Exception Interrupt ??? */
+#define SPR_EID 0x051 /* ..8 Exception Interrupt ??? */
+#define SPR_NRI 0x052 /* ..8 Exception Interrupt ??? */
#define SPR_USPRG0 0x100 /* 4.. User SPR General 0 */
#define SPR_VRSAVE 0x100 /* .6. AltiVec VRSAVE */
#define SPR_SPRG0 0x110 /* 468 SPR General 0 */
@@ -184,6 +184,8 @@
#define MPC8245 0x8081
#define FSL_E500v1 0x8020
#define FSL_E500v2 0x8021
+#define FSL_E500mc 0x8023
+#define FSL_E5500 0x8024
#define SPR_IBAT0U 0x210 /* .68 Instruction BAT Reg 0 Upper */
#define SPR_IBAT0U 0x210 /* .6. Instruction BAT Reg 0 Upper */
@@ -202,49 +204,49 @@
#define SPR_DBAT2L 0x21d /* .6. Data BAT Reg 2 Lower */
#define SPR_DBAT3U 0x21e /* .6. Data BAT Reg 3 Upper */
#define SPR_DBAT3L 0x21f /* .6. Data BAT Reg 3 Lower */
-#define SPR_IC_CST 0x230 /* ..8 Instruction Cache CSR */
-#define IC_CST_IEN 0x80000000 /* I cache is ENabled (RO) */
-#define IC_CST_CMD_INVALL 0x0c000000 /* I cache invalidate all */
-#define IC_CST_CMD_UNLOCKALL 0x0a000000 /* I cache unlock all */
-#define IC_CST_CMD_UNLOCK 0x08000000 /* I cache unlock block */
-#define IC_CST_CMD_LOADLOCK 0x06000000 /* I cache load & lock block */
-#define IC_CST_CMD_DISABLE 0x04000000 /* I cache disable */
-#define IC_CST_CMD_ENABLE 0x02000000 /* I cache enable */
-#define IC_CST_CCER1 0x00200000 /* I cache error type 1 (RO) */
-#define IC_CST_CCER2 0x00100000 /* I cache error type 2 (RO) */
-#define IC_CST_CCER3 0x00080000 /* I cache error type 3 (RO) */
+#define SPR_IC_CST 0x230 /* ..8 Instruction Cache CSR */
+#define IC_CST_IEN 0x80000000 /* I cache is ENabled (RO) */
+#define IC_CST_CMD_INVALL 0x0c000000 /* I cache invalidate all */
+#define IC_CST_CMD_UNLOCKALL 0x0a000000 /* I cache unlock all */
+#define IC_CST_CMD_UNLOCK 0x08000000 /* I cache unlock block */
+#define IC_CST_CMD_LOADLOCK 0x06000000 /* I cache load & lock block */
+#define IC_CST_CMD_DISABLE 0x04000000 /* I cache disable */
+#define IC_CST_CMD_ENABLE 0x02000000 /* I cache enable */
+#define IC_CST_CCER1 0x00200000 /* I cache error type 1 (RO) */
+#define IC_CST_CCER2 0x00100000 /* I cache error type 2 (RO) */
+#define IC_CST_CCER3 0x00080000 /* I cache error type 3 (RO) */
#define SPR_IBAT4U 0x230 /* .6. Instruction BAT Reg 4 Upper */
-#define SPR_IC_ADR 0x231 /* ..8 Instruction Cache Address */
+#define SPR_IC_ADR 0x231 /* ..8 Instruction Cache Address */
#define SPR_IBAT4L 0x231 /* .6. Instruction BAT Reg 4 Lower */
-#define SPR_IC_DAT 0x232 /* ..8 Instruction Cache Data */
+#define SPR_IC_DAT 0x232 /* ..8 Instruction Cache Data */
#define SPR_IBAT5U 0x232 /* .6. Instruction BAT Reg 5 Upper */
#define SPR_IBAT5L 0x233 /* .6. Instruction BAT Reg 5 Lower */
#define SPR_IBAT6U 0x234 /* .6. Instruction BAT Reg 6 Upper */
#define SPR_IBAT6L 0x235 /* .6. Instruction BAT Reg 6 Lower */
#define SPR_IBAT7U 0x236 /* .6. Instruction BAT Reg 7 Upper */
#define SPR_IBAT7L 0x237 /* .6. Instruction BAT Reg 7 Lower */
-#define SPR_DC_CST 0x230 /* ..8 Data Cache CSR */
-#define DC_CST_DEN 0x80000000 /* D cache ENabled (RO) */
-#define DC_CST_DFWT 0x40000000 /* D cache Force Write-Thru (RO) */
-#define DC_CST_LES 0x20000000 /* D cache Little Endian Swap (RO) */
-#define DC_CST_CMD_FLUSH 0x0e000000 /* D cache invalidate all */
-#define DC_CST_CMD_INVALL 0x0c000000 /* D cache invalidate all */
-#define DC_CST_CMD_UNLOCKALL 0x0a000000 /* D cache unlock all */
-#define DC_CST_CMD_UNLOCK 0x08000000 /* D cache unlock block */
-#define DC_CST_CMD_CLRLESWAP 0x07000000 /* D cache clr little-endian swap */
-#define DC_CST_CMD_LOADLOCK 0x06000000 /* D cache load & lock block */
-#define DC_CST_CMD_SETLESWAP 0x05000000 /* D cache set little-endian swap */
-#define DC_CST_CMD_DISABLE 0x04000000 /* D cache disable */
-#define DC_CST_CMD_CLRFWT 0x03000000 /* D cache clear forced write-thru */
-#define DC_CST_CMD_ENABLE 0x02000000 /* D cache enable */
-#define DC_CST_CMD_SETFWT 0x01000000 /* D cache set forced write-thru */
-#define DC_CST_CCER1 0x00200000 /* D cache error type 1 (RO) */
-#define DC_CST_CCER2 0x00100000 /* D cache error type 2 (RO) */
-#define DC_CST_CCER3 0x00080000 /* D cache error type 3 (RO) */
+#define SPR_DC_CST 0x230 /* ..8 Data Cache CSR */
+#define DC_CST_DEN 0x80000000 /* D cache ENabled (RO) */
+#define DC_CST_DFWT 0x40000000 /* D cache Force Write-Thru (RO) */
+#define DC_CST_LES 0x20000000 /* D cache Little Endian Swap (RO) */
+#define DC_CST_CMD_FLUSH 0x0e000000 /* D cache invalidate all */
+#define DC_CST_CMD_INVALL 0x0c000000 /* D cache invalidate all */
+#define DC_CST_CMD_UNLOCKALL 0x0a000000 /* D cache unlock all */
+#define DC_CST_CMD_UNLOCK 0x08000000 /* D cache unlock block */
+#define DC_CST_CMD_CLRLESWAP 0x07000000 /* D cache clr little-endian swap */
+#define DC_CST_CMD_LOADLOCK 0x06000000 /* D cache load & lock block */
+#define DC_CST_CMD_SETLESWAP 0x05000000 /* D cache set little-endian swap */
+#define DC_CST_CMD_DISABLE 0x04000000 /* D cache disable */
+#define DC_CST_CMD_CLRFWT 0x03000000 /* D cache clear forced write-thru */
+#define DC_CST_CMD_ENABLE 0x02000000 /* D cache enable */
+#define DC_CST_CMD_SETFWT 0x01000000 /* D cache set forced write-thru */
+#define DC_CST_CCER1 0x00200000 /* D cache error type 1 (RO) */
+#define DC_CST_CCER2 0x00100000 /* D cache error type 2 (RO) */
+#define DC_CST_CCER3 0x00080000 /* D cache error type 3 (RO) */
#define SPR_DBAT4U 0x238 /* .6. Data BAT Reg 4 Upper */
-#define SPR_DC_ADR 0x231 /* ..8 Data Cache Address */
+#define SPR_DC_ADR 0x231 /* ..8 Data Cache Address */
#define SPR_DBAT4L 0x239 /* .6. Data BAT Reg 4 Lower */
-#define SPR_DC_DAT 0x232 /* ..8 Data Cache Data */
+#define SPR_DC_DAT 0x232 /* ..8 Data Cache Data */
#define SPR_DBAT5U 0x23a /* .6. Data BAT Reg 5 Upper */
#define SPR_DBAT5L 0x23b /* .6. Data BAT Reg 5 Lower */
#define SPR_DBAT6U 0x23c /* .6. Data BAT Reg 6 Upper */
@@ -252,46 +254,46 @@
#define SPR_DBAT7U 0x23e /* .6. Data BAT Reg 7 Upper */
#define SPR_DBAT7L 0x23f /* .6. Data BAT Reg 7 Lower */
#define SPR_MI_CTR 0x310 /* ..8 IMMU control */
-#define Mx_CTR_GPM 0x80000000 /* Group Protection Mode */
-#define Mx_CTR_PPM 0x40000000 /* Page Protection Mode */
-#define Mx_CTR_CIDEF 0x20000000 /* Cache-Inhibit DEFault */
-#define MD_CTR_WTDEF 0x20000000 /* Write-Through DEFault */
-#define Mx_CTR_RSV4 0x08000000 /* Reserve 4 TLB entries */
-#define MD_CTR_TWAM 0x04000000 /* TableWalk Assist Mode */
-#define Mx_CTR_PPCS 0x02000000 /* Priv/user state compare mode */
-#define Mx_CTR_TLB_INDX 0x000001f0 /* TLB index mask */
-#define Mx_CTR_TLB_INDX_BITPOS 8 /* TLB index shift */
+#define Mx_CTR_GPM 0x80000000 /* Group Protection Mode */
+#define Mx_CTR_PPM 0x40000000 /* Page Protection Mode */
+#define Mx_CTR_CIDEF 0x20000000 /* Cache-Inhibit DEFault */
+#define MD_CTR_WTDEF 0x20000000 /* Write-Through DEFault */
+#define Mx_CTR_RSV4 0x08000000 /* Reserve 4 TLB entries */
+#define MD_CTR_TWAM 0x04000000 /* TableWalk Assist Mode */
+#define Mx_CTR_PPCS 0x02000000 /* Priv/user state compare mode */
+#define Mx_CTR_TLB_INDX 0x000001f0 /* TLB index mask */
+#define Mx_CTR_TLB_INDX_BITPOS 8 /* TLB index shift */
#define SPR_MI_AP 0x312 /* ..8 IMMU access protection */
-#define Mx_GP_SUPER(n) (0 << (2*(15-(n)))) /* access is supervisor */
-#define Mx_GP_PAGE (1 << (2*(15-(n)))) /* access is page protect */
-#define Mx_GP_SWAPPED (2 << (2*(15-(n)))) /* access is swapped */
-#define Mx_GP_USER (3 << (2*(15-(n)))) /* access is user */
+#define Mx_GP_SUPER(n) (0 << (2*(15-(n)))) /* access is supervisor */
+#define Mx_GP_PAGE (1 << (2*(15-(n)))) /* access is page protect */
+#define Mx_GP_SWAPPED (2 << (2*(15-(n)))) /* access is swapped */
+#define Mx_GP_USER (3 << (2*(15-(n)))) /* access is user */
#define SPR_MI_EPN 0x313 /* ..8 IMMU effective number */
-#define Mx_EPN_EPN 0xfffff000 /* Effective Page Number mask */
-#define Mx_EPN_EV 0x00000020 /* Entry Valid */
-#define Mx_EPN_ASID 0x0000000f /* Address Space ID */
+#define Mx_EPN_EPN 0xfffff000 /* Effective Page Number mask */
+#define Mx_EPN_EV 0x00000020 /* Entry Valid */
+#define Mx_EPN_ASID 0x0000000f /* Address Space ID */
#define SPR_MI_TWC 0x315 /* ..8 IMMU tablewalk control */
-#define MD_TWC_L2TB 0xfffff000 /* Level-2 Tablewalk Base */
-#define Mx_TWC_APG 0x000001e0 /* Access Protection Group */
-#define Mx_TWC_G 0x00000010 /* Guarded memory */
-#define Mx_TWC_PS 0x0000000c /* Page Size (L1) */
-#define MD_TWC_WT 0x00000002 /* Write-Through */
-#define Mx_TWC_V 0x00000001 /* Entry Valid */
+#define MD_TWC_L2TB 0xfffff000 /* Level-2 Tablewalk Base */
+#define Mx_TWC_APG 0x000001e0 /* Access Protection Group */
+#define Mx_TWC_G 0x00000010 /* Guarded memory */
+#define Mx_TWC_PS 0x0000000c /* Page Size (L1) */
+#define MD_TWC_WT 0x00000002 /* Write-Through */
+#define Mx_TWC_V 0x00000001 /* Entry Valid */
#define SPR_MI_RPN 0x316 /* ..8 IMMU real (phys) page number */
-#define Mx_RPN_RPN 0xfffff000 /* Real Page Number */
-#define Mx_RPN_PP 0x00000ff0 /* Page Protection */
-#define Mx_RPN_SPS 0x00000008 /* Small Page Size */
-#define Mx_RPN_SH 0x00000004 /* SHared page */
-#define Mx_RPN_CI 0x00000002 /* Cache Inhibit */
-#define Mx_RPN_V 0x00000001 /* Valid */
+#define Mx_RPN_RPN 0xfffff000 /* Real Page Number */
+#define Mx_RPN_PP 0x00000ff0 /* Page Protection */
+#define Mx_RPN_SPS 0x00000008 /* Small Page Size */
+#define Mx_RPN_SH 0x00000004 /* SHared page */
+#define Mx_RPN_CI 0x00000002 /* Cache Inhibit */
+#define Mx_RPN_V 0x00000001 /* Valid */
#define SPR_MD_CTR 0x318 /* ..8 DMMU control */
#define SPR_M_CASID 0x319 /* ..8 CASID */
-#define M_CASID 0x0000000f /* Current AS Id */
+#define M_CASID 0x0000000f /* Current AS Id */
#define SPR_MD_AP 0x31a /* ..8 DMMU access protection */
#define SPR_MD_EPN 0x31b /* ..8 DMMU effective number */
#define SPR_M_TWB 0x31c /* ..8 MMU tablewalk base */
-#define M_TWB_L1TB 0xfffff000 /* level-1 translation base */
-#define M_TWB_L1INDX 0x00000ffc /* level-1 index */
+#define M_TWB_L1TB 0xfffff000 /* level-1 translation base */
+#define M_TWB_L1INDX 0x00000ffc /* level-1 index */
#define SPR_MD_TWC 0x31d /* ..8 DMMU tablewalk control */
#define SPR_MD_RPN 0x31e /* ..8 DMMU real (phys) page number */
#define SPR_MD_TW 0x31f /* ..8 MMU tablewalk scratch */
@@ -307,8 +309,8 @@
#define SPR_UMMCR1 0x3ac /* .6. User Monitor Mode Control Register 1 */
#define SPR_ZPR 0x3b0 /* 4.. Zone Protection Register */
#define SPR_MMCR2 0x3b0 /* .6. Monitor Mode Control Register 2 */
-#define SPR_MMCR2_THRESHMULT_32 0x80000000 /* Multiply MMCR0 threshold by 32 */
-#define SPR_MMCR2_THRESHMULT_2 0x00000000 /* Multiply MMCR0 threshold by 2 */
+#define SPR_MMCR2_THRESHMULT_32 0x80000000 /* Multiply MMCR0 threshold by 32 */
+#define SPR_MMCR2_THRESHMULT_2 0x00000000 /* Multiply MMCR0 threshold by 2 */
#define SPR_PID 0x3b1 /* 4.. Process ID */
#define SPR_PMC5 0x3b1 /* .6. Performance Counter Register 5 */
#define SPR_PMC6 0x3b2 /* .6. Performance Counter Register 6 */
@@ -323,14 +325,14 @@
#define SPR_970MMCR1 0x31e /* ... Monitor Mode Control Register 1 (PPC 970) */
#define SPR_970MMCRA 0x312 /* ... Monitor Mode Control Register 2 (PPC 970) */
#define SPR_970MMCR0 0x31b /* ... Monitor Mode Control Register 0 (PPC 970) */
-#define SPR_970PMC1 0x313 /* ... PMC 1 */
-#define SPR_970PMC2 0x314 /* ... PMC 2 */
-#define SPR_970PMC3 0x315 /* ... PMC 3 */
-#define SPR_970PMC4 0x316 /* ... PMC 4 */
-#define SPR_970PMC5 0x317 /* ... PMC 5 */
-#define SPR_970PMC6 0x318 /* ... PMC 6 */
-#define SPR_970PMC7 0x319 /* ... PMC 7 */
-#define SPR_970PMC8 0x31a /* ... PMC 8 */
+#define SPR_970PMC1 0x313 /* ... PMC 1 */
+#define SPR_970PMC2 0x314 /* ... PMC 2 */
+#define SPR_970PMC3 0x315 /* ... PMC 3 */
+#define SPR_970PMC4 0x316 /* ... PMC 4 */
+#define SPR_970PMC5 0x317 /* ... PMC 5 */
+#define SPR_970PMC6 0x318 /* ... PMC 6 */
+#define SPR_970PMC7 0x319 /* ... PMC 7 */
+#define SPR_970PMC8 0x31a /* ... PMC 8 */
#define SPR_MMCR0_FC 0x80000000 /* Freeze counters */
#define SPR_MMCR0_FCS 0x40000000 /* Freeze counters in supervisor mode */
@@ -421,6 +423,7 @@
#define SPR_SRR3 0x3df /* 4.. Save/Restore Register 3 */
#define SPR_HID0 0x3f0 /* ..8 Hardware Implementation Register 0 */
#define SPR_HID1 0x3f1 /* ..8 Hardware Implementation Register 1 */
+#define SPR_HID2 0x3f3 /* ..8 Hardware Implementation Register 2 */
#define SPR_HID4 0x3f4 /* ..8 Hardware Implementation Register 4 */
#define SPR_HID5 0x3f6 /* ..8 Hardware Implementation Register 5 */
#define SPR_HID6 0x3f9 /* ..8 Hardware Implementation Register 6 */
@@ -452,7 +455,7 @@
#define SPR_DAC1 0x3f6 /* 4.. Data Address Compare 1 */
#define SPR_DAC2 0x3f7 /* 4.. Data Address Compare 2 */
#define SPR_PIR 0x3ff /* .6. Processor Identification Register */
-#elif defined(E500)
+#elif defined(BOOKE)
#define SPR_PIR 0x11e /* ..8 Processor Identification Register */
#define SPR_DBSR 0x130 /* ..8 Debug Status Register */
#define DBSR_IDE 0x80000000 /* Imprecise debug event. */
@@ -554,7 +557,6 @@
#define L2CR_L2DRO 0x00000100 /* 23: L2DLL rollover checkstop enable. */
#define L2CR_L2IP 0x00000001 /* 31: L2 global invalidate in */
/* progress (read only). */
-
#define SPR_L3CR 0x3fa /* .6. L3 Control Register */
#define L3CR_L3E 0x80000000 /* 0: L3 enable */
#define L3CR_L3PE 0x40000000 /* 1: L3 data parity enable */
@@ -582,15 +584,15 @@
#define SPR_ICCR 0x3fb /* 4.. Instruction Cache Cachability Register */
#define SPR_THRM1 0x3fc /* .6. Thermal Management Register */
#define SPR_THRM2 0x3fd /* .6. Thermal Management Register */
-#define SPR_THRM_TIN 0x80000000 /* Thermal interrupt bit (RO) */
-#define SPR_THRM_TIV 0x40000000 /* Thermal interrupt valid (RO) */
-#define SPR_THRM_THRESHOLD(x) ((x) << 23) /* Thermal sensor threshold */
-#define SPR_THRM_TID 0x00000004 /* Thermal interrupt direction */
-#define SPR_THRM_TIE 0x00000002 /* Thermal interrupt enable */
-#define SPR_THRM_VALID 0x00000001 /* Valid bit */
+#define SPR_THRM_TIN 0x80000000 /* Thermal interrupt bit (RO) */
+#define SPR_THRM_TIV 0x40000000 /* Thermal interrupt valid (RO) */
+#define SPR_THRM_THRESHOLD(x) ((x) << 23) /* Thermal sensor threshold */
+#define SPR_THRM_TID 0x00000004 /* Thermal interrupt direction */
+#define SPR_THRM_TIE 0x00000002 /* Thermal interrupt enable */
+#define SPR_THRM_VALID 0x00000001 /* Valid bit */
#define SPR_THRM3 0x3fe /* .6. Thermal Management Register */
-#define SPR_THRM_TIMER(x) ((x) << 1) /* Sampling interval timer */
-#define SPR_THRM_ENABLE 0x00000001 /* TAU Enable */
+#define SPR_THRM_TIMER(x) ((x) << 1) /* Sampling interval timer */
+#define SPR_THRM_ENABLE 0x00000001 /* TAU Enable */
#define SPR_FPECR 0x3fe /* .6. Floating-Point Exception Cause Register */
/* Time Base Register declarations */
@@ -600,7 +602,7 @@
#define TBR_TBWU 0x11d /* 468 Time Base Upper - supervisor, write */
/* Performance counter declarations */
-#define PMC_OVERFLOW 0x80000000 /* Counter has overflowed */
+#define PMC_OVERFLOW 0x80000000 /* Counter has overflowed */
/* The first five countable [non-]events are common to many PMC's */
#define PMCN_NONE 0 /* Count nothing */
@@ -616,7 +618,7 @@
#if defined(AIM)
-#define SPR_ESR 0x3d4 /* 4.. Exception Syndrome Register */
+#define SPR_ESR 0x3d4 /* 4.. Exception Syndrome Register */
#define ESR_MCI 0x80000000 /* Machine check - instruction */
#define ESR_PIL 0x08000000 /* Program interrupt - illegal */
#define ESR_PPR 0x04000000 /* Program interrupt - privileged */
@@ -626,7 +628,9 @@
#define ESR_DIZ 0x00800000 /* Data/instruction storage interrupt - zone fault */
#define ESR_U0F 0x00008000 /* Data storage interrupt - U0 fault */
-#elif defined(E500)
+#elif defined(BOOKE)
+
+#define SPR_MCSR 0x23c /* ..8 Machine Check Syndrome register */
#define SPR_ESR 0x003e /* ..8 Exception Syndrome Register */
#define ESR_PIL 0x08000000 /* Program interrupt - illegal */
@@ -643,6 +647,19 @@
#define SPR_MCSRR0 0x23a /* ..8 570 Machine check SRR0 */
#define SPR_MCSRR1 0x23b /* ..8 571 Machine check SRR1 */
+#define SPR_MMUCR 0x3b2 /* 4.. MMU Control Register */
+#define MMUCR_SWOA (0x80000000 >> 7)
+#define MMUCR_U1TE (0x80000000 >> 9)
+#define MMUCR_U2SWOAE (0x80000000 >> 10)
+#define MMUCR_DULXE (0x80000000 >> 12)
+#define MMUCR_IULXE (0x80000000 >> 13)
+#define MMUCR_STS (0x80000000 >> 15)
+#define MMUCR_STID_MASK (0xFF000000 >> 24)
+
+#define SPR_MMUCSR0 0x3f4 /* ..8 1012 MMU Control and Status Register 0 */
+#define MMUCSR0_L2TLB0_FI 0x04 /* TLB0 flash invalidate */
+#define MMUCSR0_L2TLB1_FI 0x02 /* TLB1 flash invalidate */
+
#define SPR_SVR 0x3ff /* ..8 1023 System Version Register */
#define SVR_MPC8533 0x8034
#define SVR_MPC8533E 0x803c
@@ -662,10 +679,16 @@
#define SVR_P2010E 0x80eb
#define SVR_P2020 0x80e2
#define SVR_P2020E 0x80ea
+#define SVR_P2041 0x8210
+#define SVR_P2041E 0x8218
+#define SVR_P3041 0x8211
+#define SVR_P3041E 0x8219
#define SVR_P4040 0x8200
#define SVR_P4040E 0x8208
#define SVR_P4080 0x8201
#define SVR_P4080E 0x8209
+#define SVR_P5020 0x8220
+#define SVR_P5020E 0x8228
#define SVR_VER(svr) (((svr) >> 16) & 0xffff)
#define SPR_PID0 0x030 /* ..8 Process ID Register 0 */
@@ -708,6 +731,18 @@
#define SPR_MAS5 0x275 /* ..8 MMU Assist Register 5 Book-E */
#define SPR_MAS6 0x276 /* ..8 MMU Assist Register 6 Book-E/e500 */
#define SPR_MAS7 0x3B0 /* ..8 MMU Assist Register 7 Book-E/e500 */
+#define SPR_MAS8 0x155 /* ..8 MMU Assist Register 8 Book-E/e500 */
+
+#define SPR_L1CFG0 0x203 /* ..8 L1 cache configuration register 0 */
+#define SPR_L1CFG1 0x204 /* ..8 L1 cache configuration register 1 */
+
+#define SPR_CCR1 0x378
+#define CCR1_L2COBE 0x00000040
+
+#define DCR_L2DCDCRAI 0x0000 /* L2 D-Cache DCR Address Pointer */
+#define DCR_L2DCDCRDI 0x0001 /* L2 D-Cache DCR Data Indirect */
+#define DCR_L2CR0 0x00 /* L2 Cache Configuration Register 0 */
+#define L2CR0_AS 0x30000000
#define SPR_L1CSR0 0x3F2 /* ..8 L1 Cache Control and Status Register 0 */
#define L1CSR0_DCPE 0x00010000 /* Data Cache Parity Enable */
@@ -716,13 +751,20 @@
#define L1CSR0_DCE 0x00000001 /* Data Cache Enable */
#define SPR_L1CSR1 0x3F3 /* ..8 L1 Cache Control and Status Register 1 */
#define L1CSR1_ICPE 0x00010000 /* Instruction Cache Parity Enable */
+#define L1CSR1_ICUL 0x00000400 /* Instr Cache Unable to Lock */
#define L1CSR1_ICLFR 0x00000100 /* Instruction Cache Lock Bits Flash Reset */
#define L1CSR1_ICFI 0x00000002 /* Instruction Cache Flash Invalidate */
#define L1CSR1_ICE 0x00000001 /* Instruction Cache Enable */
+#define SPR_L2CSR0 0x3F9 /* ..8 L2 Cache Control and Status Register 0 */
+#define L2CSR0_L2E 0x80000000 /* L2 Cache Enable */
+#define L2CSR0_L2PE 0x40000000 /* L2 Cache Parity Enable */
+#define L2CSR0_L2FI 0x00200000 /* L2 Cache Flash Invalidate */
+#define L2CSR0_L2LFC 0x00000400 /* L2 Cache Lock Flags Clear */
+
#define SPR_BUCSR 0x3F5 /* ..8 Branch Unit Control and Status Register */
#define BUCSR_BPEN 0x00000001 /* Branch Prediction Enable */
+#define BUCSR_BBFI 0x00000200 /* Branch Buffer Flash Invalidate */
-#endif /* #elif defined(E500) */
-
+#endif /* BOOKE */
#endif /* !_POWERPC_SPR_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/tlb.h
--- a/head/sys/powerpc/include/tlb.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/tlb.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,5 +1,5 @@
/*-
- * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8 at semihalf.com>
+ * Copyright (C) 2006-2012 Semihalf.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -24,12 +24,14 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/tlb.h 236141 2012-05-27 10:25:20Z raj $
*/
#ifndef _MACHINE_TLB_H_
#define _MACHINE_TLB_H_
+#if defined(BOOKE_E500)
+
/* PowerPC E500 MAS registers */
#define MAS0_TLBSEL(x) ((x << 28) & 0x10000000)
#define MAS0_ESEL(x) ((x << 16) & 0x000F0000)
@@ -122,6 +124,73 @@
#define _TLB_ENTRY_MEM (0)
#endif
+#if !defined(LOCORE)
+typedef struct tlb_entry {
+ uint32_t mas1;
+ uint32_t mas2;
+ uint32_t mas3;
+} tlb_entry_t;
+
+void tlb0_print_tlbentries(void);
+
+void tlb1_inval_entry(unsigned int);
+void tlb1_init(vm_offset_t);
+void tlb1_print_entries(void);
+void tlb1_print_tlbentries(void);
+#endif /* !LOCORE */
+
+#elif defined(BOOKE_PPC4XX)
+
+/* TLB Words */
+#define TLB_PAGEID 0
+#define TLB_XLAT 1
+#define TLB_ATTRIB 2
+
+/* Page identification fields */
+#define TLB_EPN_MASK (0xFFFFFC00 >> 0)
+#define TLB_VALID (0x80000000 >> 22)
+#define TLB_TS (0x80000000 >> 23)
+#define TLB_SIZE_1K (0x00000000 >> 24)
+#define TLB_SIZE_MASK (0xF0000000 >> 24)
+
+/* Translation fields */
+#define TLB_RPN_MASK (0xFFFFFC00 >> 0)
+#define TLB_ERPN_MASK (0xF0000000 >> 28)
+
+/* Storage attribute and access control fields */
+#define TLB_WL1 (0x80000000 >> 11)
+#define TLB_IL1I (0x80000000 >> 12)
+#define TLB_IL1D (0x80000000 >> 13)
+#define TLB_IL2I (0x80000000 >> 14)
+#define TLB_IL2D (0x80000000 >> 15)
+#define TLB_U0 (0x80000000 >> 16)
+#define TLB_U1 (0x80000000 >> 17)
+#define TLB_U2 (0x80000000 >> 18)
+#define TLB_U3 (0x80000000 >> 19)
+#define TLB_W (0x80000000 >> 20)
+#define TLB_I (0x80000000 >> 21)
+#define TLB_M (0x80000000 >> 22)
+#define TLB_G (0x80000000 >> 23)
+#define TLB_E (0x80000000 >> 24)
+#define TLB_UX (0x80000000 >> 26)
+#define TLB_UW (0x80000000 >> 27)
+#define TLB_UR (0x80000000 >> 28)
+#define TLB_SX (0x80000000 >> 29)
+#define TLB_SW (0x80000000 >> 30)
+#define TLB_SR (0x80000000 >> 31)
+#define TLB_SIZE 64
+
+#define TLB_SIZE_4K (0x10000000 >> 24)
+#define TLB_SIZE_16K (0x20000000 >> 24)
+#define TLB_SIZE_64K (0x30000000 >> 24)
+#define TLB_SIZE_256K (0x40000000 >> 24)
+#define TLB_SIZE_1M (0x50000000 >> 24)
+#define TLB_SIZE_16M (0x70000000 >> 24)
+#define TLB_SIZE_256M (0x90000000 >> 24)
+#define TLB_SIZE_1G (0xA0000000 >> 24)
+
+#endif /* BOOKE_E500 */
+
#define TID_KERNEL 0 /* TLB TID to use for kernel (shared) translations */
#define TID_KRESERVED 1 /* Number of TIDs reserved for kernel */
#define TID_URESERVED 0 /* Number of TIDs reserved for user */
@@ -132,22 +201,11 @@
#define TLB_UNLOCKED 0
#if !defined(LOCORE)
-typedef struct tlb_entry {
- uint32_t mas1;
- uint32_t mas2;
- uint32_t mas3;
-} tlb_entry_t;
typedef int tlbtid_t;
+
struct pmap;
-void tlb0_print_tlbentries(void);
-
-void tlb1_inval_entry(unsigned int);
-void tlb1_init(vm_offset_t);
-void tlb1_print_entries(void);
-void tlb1_print_tlbentries(void);
-
void tlb_lock(uint32_t *);
void tlb_unlock(uint32_t *);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/trap.h
--- a/head/sys/powerpc/include/trap.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/trap.h Wed Jul 25 16:40:53 2012 +0300
@@ -1,8 +1,8 @@
-/* $FreeBSD: head/sys/powerpc/include/trap.h 233635 2012-03-29 02:02:14Z nwhitehorn $ */
+/* $FreeBSD: head/sys/powerpc/include/trap.h 236141 2012-05-27 10:25:20Z raj $ */
#if defined(AIM)
#include <machine/trap_aim.h>
-#elif defined(E500)
+#elif defined(BOOKE)
#include <machine/trap_booke.h>
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/ucontext.h
--- a/head/sys/powerpc/include/ucontext.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/ucontext.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $NetBSD: signal.h,v 1.4 1998/09/14 02:48:34 thorpej Exp $
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/include/ucontext.h 234542 2012-04-21 14:39:47Z nwhitehorn $
*/
#ifndef _MACHINE_UCONTEXT_H_
@@ -71,9 +71,9 @@
#define mc_ctr mc_frame[35]
#define mc_srr0 mc_frame[36]
#define mc_srr1 mc_frame[37]
-#define mc_dar mc_frame[38]
-#define mc_dsisr mc_frame[39]
-#define mc_exc mc_frame[40]
+#define mc_exc mc_frame[38]
+#define mc_dar mc_frame[39]
+#define mc_dsisr mc_frame[40]
/* floating-point state */
#define mc_fpscr mc_fpreg[32]
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/vdso.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/powerpc/include/vdso.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,41 @@
+/*-
+ * Copyright 2012 Konstantin Belousov <kib at FreeBSD.ORG>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/powerpc/include/vdso.h 237433 2012-06-22 07:06:40Z kib $
+ */
+
+#ifndef _POWERPC_VDSO_H
+#define _POWERPC_VDSO_H
+
+#define VDSO_TIMEHANDS_MD \
+ uint32_t th_res[8];
+
+#ifdef _KERNEL
+#ifdef COMPAT_FREEBSD32
+
+#define VDSO_TIMEHANDS_MD32 VDSO_TIMEHANDS_MD
+
+#endif
+#endif
+#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/include/vmparam.h
--- a/head/sys/powerpc/include/vmparam.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/include/vmparam.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $NetBSD: vmparam.h,v 1.11 2000/02/11 19:25:16 thorpej Exp $
- * $FreeBSD: head/sys/powerpc/include/vmparam.h 228413 2011-12-11 17:23:03Z nwhitehorn $
+ * $FreeBSD: head/sys/powerpc/include/vmparam.h 236141 2012-05-27 10:25:20Z raj $
*/
#ifndef _MACHINE_VMPARAM_H_
@@ -78,7 +78,7 @@
#endif
#define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE)
#else /* LOCORE */
-#if !defined(__powerpc64__) && defined(E500)
+#if !defined(__powerpc64__) && defined(BOOKE)
#define VM_MIN_ADDRESS 0
#define VM_MAXUSER_ADDRESS 0x7ffff000
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/fsl_sdhc.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/powerpc/mpc85xx/fsl_sdhc.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,1306 @@
+/*-
+ * Copyright (c) 2011-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Driver for Freescale integrated eSDHC controller.
+ * Limitations:
+ * - No support for multi-block transfers.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/fsl_sdhc.c 236121 2012-05-26 21:07:15Z raj $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <machine/bus.h>
+#include <machine/vmparam.h>
+
+#include <dev/fdt/fdt_common.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <dev/mmc/bridge.h>
+#include <dev/mmc/mmcreg.h>
+#include <dev/mmc/mmcvar.h>
+#include <dev/mmc/mmcbrvar.h>
+
+#include <powerpc/mpc85xx/mpc85xx.h>
+
+#include "opt_platform.h"
+
+#include "mmcbr_if.h"
+
+#include "fsl_sdhc.h"
+
+#define DEBUG
+#undef DEBUG
+#ifdef DEBUG
+#define DPRINTF(fmt, arg...) printf("DEBUG %s(): " fmt, __FUNCTION__, ##arg)
+#else
+#define DPRINTF(fmt, arg...)
+#endif
+
+
+/*****************************************************************************
+ * Register the driver
+ *****************************************************************************/
+/* Forward declarations */
+static int fsl_sdhc_probe(device_t);
+static int fsl_sdhc_attach(device_t);
+static int fsl_sdhc_detach(device_t);
+
+static int fsl_sdhc_read_ivar(device_t, device_t, int, uintptr_t *);
+static int fsl_sdhc_write_ivar(device_t, device_t, int, uintptr_t);
+
+static int fsl_sdhc_update_ios(device_t, device_t);
+static int fsl_sdhc_request(device_t, device_t, struct mmc_request *);
+static int fsl_sdhc_get_ro(device_t, device_t);
+static int fsl_sdhc_acquire_host(device_t, device_t);
+static int fsl_sdhc_release_host(device_t, device_t);
+
+static device_method_t fsl_sdhc_methods[] = {
+ /* device_if */
+ DEVMETHOD(device_probe, fsl_sdhc_probe),
+ DEVMETHOD(device_attach, fsl_sdhc_attach),
+ DEVMETHOD(device_detach, fsl_sdhc_detach),
+
+ /* Bus interface */
+ DEVMETHOD(bus_read_ivar, fsl_sdhc_read_ivar),
+ DEVMETHOD(bus_write_ivar, fsl_sdhc_write_ivar),
+
+ /* OFW bus interface */
+ DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat),
+ DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model),
+ DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name),
+ DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node),
+ DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type),
+
+ /* mmcbr_if */
+ DEVMETHOD(mmcbr_update_ios, fsl_sdhc_update_ios),
+ DEVMETHOD(mmcbr_request, fsl_sdhc_request),
+ DEVMETHOD(mmcbr_get_ro, fsl_sdhc_get_ro),
+ DEVMETHOD(mmcbr_acquire_host, fsl_sdhc_acquire_host),
+ DEVMETHOD(mmcbr_release_host, fsl_sdhc_release_host),
+
+ {0, 0},
+};
+
+/* kobj_class definition */
+static driver_t fsl_sdhc_driver = {
+ "sdhci",
+ fsl_sdhc_methods,
+ sizeof(struct fsl_sdhc_softc)
+};
+
+static devclass_t fsl_sdhc_devclass;
+
+DRIVER_MODULE(sdhci, simplebus, fsl_sdhc_driver, fsl_sdhc_devclass, 0, 0);
+
+
+/*****************************************************************************
+ * Private methods
+ *****************************************************************************/
+static inline int
+read4(struct fsl_sdhc_softc *sc, unsigned int offset)
+{
+
+ return bus_space_read_4(sc->bst, sc->bsh, offset);
+}
+
+static inline void
+write4(struct fsl_sdhc_softc *sc, unsigned int offset, int value)
+{
+
+ bus_space_write_4(sc->bst, sc->bsh, offset, value);
+}
+
+static inline void
+set_bit(struct fsl_sdhc_softc *sc, uint32_t offset, uint32_t mask)
+{
+ uint32_t x = read4(sc, offset);
+
+ write4(sc, offset, x | mask);
+}
+
+static inline void
+clear_bit(struct fsl_sdhc_softc *sc, uint32_t offset, uint32_t mask)
+{
+ uint32_t x = read4(sc, offset);
+
+ write4(sc, offset, x & ~mask);
+}
+
+static int
+wait_for_bit_clear(struct fsl_sdhc_softc *sc, enum sdhc_reg_off reg,
+ uint32_t bit)
+{
+ uint32_t timeout = 10;
+ uint32_t stat;
+
+ stat = read4(sc, reg);
+ while (stat & bit) {
+ if (timeout == 0) {
+ return (-1);
+ }
+ --timeout;
+ DELAY(1000);
+ stat = read4(sc, reg);
+ }
+
+ return (0);
+}
+
+static int
+wait_for_free_line(struct fsl_sdhc_softc *sc, enum sdhc_line line)
+{
+ uint32_t timeout = 100;
+ uint32_t stat;
+
+ stat = read4(sc, SDHC_PRSSTAT);
+ while (stat & line) {
+ if (timeout == 0) {
+ return (-1);
+ }
+ --timeout;
+ DELAY(1000);
+ stat = read4(sc, SDHC_PRSSTAT);
+ }
+
+ return (0);
+}
+
+static uint32_t
+get_platform_clock(struct fsl_sdhc_softc *sc)
+{
+ device_t self, parent;
+ phandle_t node;
+ uint32_t clock;
+
+ self = sc->self;
+ node = ofw_bus_get_node(self);
+
+ /* Get sdhci node properties */
+ if((OF_getprop(node, "clock-frequency", (void *)&clock,
+ sizeof(clock)) <= 0) || (clock == 0)) {
+
+ /*
+ * Trying to get clock from parent device (soc) if correct
+ * clock cannot be acquired from sdhci node.
+ */
+ parent = device_get_parent(self);
+ node = ofw_bus_get_node(parent);
+
+ /* Get soc properties */
+ if ((OF_getprop(node, "bus-frequency", (void *)&clock,
+ sizeof(clock)) <= 0) || (clock == 0)) {
+ device_printf(self,"Cannot acquire correct sdhci "
+ "frequency from DTS.\n");
+
+ return (0);
+ }
+ }
+
+ DPRINTF("Acquired clock: %d from DTS\n", clock);
+
+ return (clock);
+}
+
+/**
+ * Set clock driving card.
+ * @param sc
+ * @param clock Desired clock frequency in Hz
+ */
+static void
+set_clock(struct fsl_sdhc_softc *sc, uint32_t clock)
+{
+ uint32_t base_clock;
+ uint32_t divisor, prescaler = 1;
+ uint32_t round = 0;
+
+ if (clock == sc->slot.clock)
+ return;
+
+ if (clock == 0) {
+ clear_bit(sc, SDHC_SYSCTL, MASK_CLOCK_CONTROL | SYSCTL_PEREN |
+ SYSCTL_HCKEN | SYSCTL_IPGEN);
+ return;
+ }
+
+ base_clock = sc->platform_clock;
+ round = base_clock & 0x2;
+ base_clock >>= 2;
+ base_clock += round;
+ round = 0;
+
+ /* SD specification 1.1 doesn't allow frequences above 50 MHz */
+ if (clock > FSL_SDHC_MAX_CLOCK)
+ clock = FSL_SDHC_MAX_CLOCK;
+
+ /*
+ * divisor = ceil(base_clock / clock)
+ * TODO: Reconsider symmetric rounding here instead of ceiling.
+ */
+ divisor = (base_clock + clock - 1) / clock;
+
+ while (divisor > 16) {
+ round = divisor & 0x1;
+ divisor >>= 1;
+
+ prescaler <<= 1;
+ }
+ divisor += round - 1;
+
+ /* Turn off the clock. */
+ clear_bit(sc, SDHC_SYSCTL, MASK_CLOCK_CONTROL);
+
+ /* Write clock settings. */
+ set_bit(sc, SDHC_SYSCTL, (prescaler << SHIFT_SDCLKFS) |
+ (divisor << SHIFT_DVS));
+
+ /*
+ * Turn on clocks.
+ * TODO: This actually disables clock automatic gating off feature of
+ * the controller which eventually should be enabled but as for now
+ * it prevents controller from generating card insertion/removal
+ * interrupts correctly.
+ */
+ set_bit(sc, SDHC_SYSCTL, SYSCTL_PEREN | SYSCTL_HCKEN | SYSCTL_IPGEN);
+
+ sc->slot.clock = clock;
+
+ DPRINTF("given clock = %d, computed clock = %d\n", clock,
+ (base_clock / prescaler) / (divisor + 1));
+}
+
+static inline void
+send_80_clock_ticks(struct fsl_sdhc_softc *sc)
+{
+ int err;
+
+ err = wait_for_free_line(sc, SDHC_CMD_LINE | SDHC_DAT_LINE);
+ if (err != 0) {
+ device_printf(sc->self, "Can't acquire data/cmd lines\n");
+ return;
+ }
+
+ set_bit(sc, SDHC_SYSCTL, SYSCTL_INITA);
+ err = wait_for_bit_clear(sc, SDHC_SYSCTL, SYSCTL_INITA);
+ if (err != 0) {
+ device_printf(sc->self, "Can't send 80 clocks to the card.\n");
+ }
+}
+
+static void
+set_bus_width(struct fsl_sdhc_softc *sc, enum mmc_bus_width width)
+{
+
+ DPRINTF("setting bus width to %d\n", width);
+ switch (width) {
+ case bus_width_1:
+ set_bit(sc, SDHC_PROCTL, DTW_1);
+ break;
+ case bus_width_4:
+ set_bit(sc, SDHC_PROCTL, DTW_4);
+ break;
+ case bus_width_8:
+ set_bit(sc, SDHC_PROCTL, DTW_8);
+ break;
+ default:
+ device_printf(sc->self, "Unsupported bus width\n");
+ }
+}
+
+static void
+reset_controller_all(struct fsl_sdhc_softc *sc)
+{
+ uint32_t count = 5;
+
+ set_bit(sc, SDHC_SYSCTL, SYSCTL_RSTA);
+ while (read4(sc, SDHC_SYSCTL) & SYSCTL_RSTA) {
+ DELAY(FSL_SDHC_RESET_DELAY);
+ --count;
+ if (count == 0) {
+ device_printf(sc->self,
+ "Can't reset the controller\n");
+ return;
+ }
+ }
+}
+
+static void
+reset_controller_dat_cmd(struct fsl_sdhc_softc *sc)
+{
+ int err;
+
+ set_bit(sc, SDHC_SYSCTL, SYSCTL_RSTD | SYSCTL_RSTC);
+ err = wait_for_bit_clear(sc, SDHC_SYSCTL, SYSCTL_RSTD | SYSCTL_RSTC);
+ if (err != 0) {
+ device_printf(sc->self, "Can't reset data & command part!\n");
+ return;
+ }
+}
+
+static void
+init_controller(struct fsl_sdhc_softc *sc)
+{
+
+ /* Enable interrupts. */
+#ifdef FSL_SDHC_NO_DMA
+ write4(sc, SDHC_IRQSTATEN, MASK_IRQ_ALL & ~IRQ_DINT & ~IRQ_DMAE);
+ write4(sc, SDHC_IRQSIGEN, MASK_IRQ_ALL & ~IRQ_DINT & ~IRQ_DMAE);
+#else
+ write4(sc, SDHC_IRQSTATEN, MASK_IRQ_ALL & ~IRQ_BRR & ~IRQ_BWR);
+ write4(sc, SDHC_IRQSIGEN, MASK_IRQ_ALL & ~IRQ_BRR & ~IRQ_BWR);
+
+ /* Write DMA address */
+ write4(sc, SDHC_DSADDR, sc->dma_phys);
+
+ /* Enable snooping and fix for AHB2MAG bypass. */
+ write4(sc, SDHC_DCR, DCR_SNOOP | DCR_AHB2MAG_BYPASS);
+#endif
+ /* Set data timeout. */
+ set_bit(sc, SDHC_SYSCTL, 0xe << SHIFT_DTOCV);
+
+ /* Set water-mark levels (FIFO buffer size). */
+ write4(sc, SDHC_WML, (FSL_SDHC_FIFO_BUF_WORDS << 16) |
+ FSL_SDHC_FIFO_BUF_WORDS);
+}
+
+static void
+init_mmc_host_struct(struct fsl_sdhc_softc *sc)
+{
+ struct mmc_host *host = &sc->mmc_host;
+
+ /* Clear host structure. */
+ bzero(host, sizeof(struct mmc_host));
+
+ /* Calculate minimum and maximum operating frequencies. */
+ host->f_min = sc->platform_clock / FSL_SDHC_MAX_DIV;
+ host->f_max = FSL_SDHC_MAX_CLOCK;
+
+ /* Set operation conditions (voltage). */
+ host->host_ocr = MMC_OCR_320_330 | MMC_OCR_330_340;
+
+ /* Set additional host controller capabilities. */
+ host->caps = MMC_CAP_4_BIT_DATA;
+
+ /* Set mode. */
+ host->mode = mode_sd;
+}
+
+static void
+card_detect_task(void *arg, int pending)
+{
+ struct fsl_sdhc_softc *sc = (struct fsl_sdhc_softc *)arg;
+ int err;
+ int insert;
+
+ insert = read4(sc, SDHC_PRSSTAT) & PRSSTAT_CINS;
+
+ mtx_lock(&sc->mtx);
+
+ if (insert) {
+ if (sc->child != NULL) {
+ mtx_unlock(&sc->mtx);
+ return;
+ }
+
+ sc->child = device_add_child(sc->self, "mmc", -1);
+ if (sc->child == NULL) {
+ device_printf(sc->self, "Couldn't add MMC bus!\n");
+ mtx_unlock(&sc->mtx);
+ return;
+ }
+
+ /* Initialize MMC bus host structure. */
+ init_mmc_host_struct(sc);
+ device_set_ivars(sc->child, &sc->mmc_host);
+
+ } else {
+ if (sc->child == NULL) {
+ mtx_unlock(&sc->mtx);
+ return;
+ }
+ }
+
+ mtx_unlock(&sc->mtx);
+
+ if (insert) {
+ if ((err = device_probe_and_attach(sc->child)) != 0) {
+ device_printf(sc->self, "MMC bus failed on probe "
+ "and attach! error %d\n", err);
+ device_delete_child(sc->self, sc->child);
+ sc->child = NULL;
+ }
+ } else {
+ if (device_delete_child(sc->self, sc->child) != 0)
+ device_printf(sc->self, "Could not delete MMC bus!\n");
+ sc->child = NULL;
+ }
+}
+
+static void
+card_detect_delay(void *arg)
+{
+ struct fsl_sdhc_softc *sc = arg;
+
+ taskqueue_enqueue(taskqueue_swi_giant, &sc->card_detect_task);
+}
+
+static void
+finalize_request(struct fsl_sdhc_softc *sc)
+{
+
+ DPRINTF("finishing request %x\n", sc->request);
+
+ sc->request->done(sc->request);
+ sc->request = NULL;
+}
+
+/**
+ * Read response from card.
+ * @todo Implement Auto-CMD responses being held in R3 for multi-block xfers.
+ * @param sc
+ */
+static void
+get_response(struct fsl_sdhc_softc *sc)
+{
+ struct mmc_command *cmd = sc->request->cmd;
+ int i;
+ uint32_t val;
+ uint8_t ext = 0;
+
+ if (cmd->flags & MMC_RSP_136) {
+ /* CRC is stripped, need to shift one byte left. */
+ for (i = 0; i < 4; i++) {
+ val = read4(sc, SDHC_CMDRSP0 + i * 4);
+ cmd->resp[3 - i] = (val << 8) + ext;
+ ext = val >> 24;
+ }
+ } else {
+ cmd->resp[0] = read4(sc, SDHC_CMDRSP0);
+ }
+}
+
+#ifdef FSL_SDHC_NO_DMA
+/**
+ * Read all content of a fifo buffer.
+ * @warning Assumes data buffer is 32-bit aligned.
+ * @param sc
+ */
+static void
+read_block_pio(struct fsl_sdhc_softc *sc)
+{
+ struct mmc_data *data = sc->request->cmd->data;
+ size_t left = min(FSL_SDHC_FIFO_BUF_SIZE, data->len);
+ uint8_t *buf = data->data;
+ uint32_t word;
+
+ buf += sc->data_offset;
+ bus_space_read_multi_4(sc->bst, sc->bsh, SDHC_DATPORT, (uint32_t *)buf,
+ left >> 2);
+
+ sc->data_offset += left;
+
+ /* Handle 32-bit unaligned size case. */
+ left &= 0x3;
+ if (left > 0) {
+ buf = (uint8_t *)data->data + (sc->data_offset & ~0x3);
+ word = read4(sc, SDHC_DATPORT);
+ while (left > 0) {
+ *(buf++) = word;
+ word >>= 8;
+ --left;
+ }
+ }
+}
+
+/**
+ * Write a fifo buffer.
+ * @warning Assumes data buffer size is 32-bit aligned.
+ * @param sc
+ */
+static void
+write_block_pio(struct fsl_sdhc_softc *sc)
+{
+ struct mmc_data *data = sc->request->cmd->data;
+ size_t left = min(FSL_SDHC_FIFO_BUF_SIZE, data->len);
+ uint8_t *buf = data->data;
+ uint32_t word = 0;
+
+ DPRINTF("sc->data_offset %d\n", sc->data_offset);
+
+ buf += sc->data_offset;
+ bus_space_write_multi_4(sc->bst, sc->bsh, SDHC_DATPORT, (uint32_t *)buf,
+ left >> 2);
+
+ sc->data_offset += left;
+
+ /* Handle 32-bit unaligned size case. */
+ left &= 0x3;
+ if (left > 0) {
+ buf = (uint8_t *)data->data + (sc->data_offset & ~0x3);
+ while (left > 0) {
+ word += *(buf++);
+ word <<= 8;
+ --left;
+ }
+ write4(sc, SDHC_DATPORT, word);
+ }
+}
+
+static void
+pio_read_transfer(struct fsl_sdhc_softc *sc)
+{
+
+ while (read4(sc, SDHC_PRSSTAT) & PRSSTAT_BREN) {
+ read_block_pio(sc);
+
+ /*
+ * TODO: should we check here whether data_offset >= data->len?
+ */
+ }
+}
+
+static void
+pio_write_transfer(struct fsl_sdhc_softc *sc)
+{
+
+ while (read4(sc, SDHC_PRSSTAT) & PRSSTAT_BWEN) {
+ write_block_pio(sc);
+
+ /*
+ * TODO: should we check here whether data_offset >= data->len?
+ */
+ }
+}
+#endif /* FSL_SDHC_USE_DMA */
+
+static inline void
+handle_command_intr(struct fsl_sdhc_softc *sc, uint32_t irq_stat)
+{
+ struct mmc_command *cmd = sc->request->cmd;
+
+ /* Handle errors. */
+ if (irq_stat & IRQ_CTOE) {
+ cmd->error = MMC_ERR_TIMEOUT;
+ } else if (irq_stat & IRQ_CCE) {
+ cmd->error = MMC_ERR_BADCRC;
+ } else if (irq_stat & (IRQ_CEBE | IRQ_CIE)) {
+ cmd->error = MMC_ERR_FIFO;
+ }
+
+ if (cmd->error) {
+ device_printf(sc->self, "Error interrupt occured\n");
+ reset_controller_dat_cmd(sc);
+ return;
+ }
+
+ if (sc->command_done)
+ return;
+
+ if (irq_stat & IRQ_CC) {
+ sc->command_done = 1;
+
+ if (cmd->flags & MMC_RSP_PRESENT)
+ get_response(sc);
+ }
+}
+
+static inline void
+handle_data_intr(struct fsl_sdhc_softc *sc, uint32_t irq_stat)
+{
+ struct mmc_command *cmd = sc->request->cmd;
+
+ /* Handle errors. */
+ if (irq_stat & IRQ_DTOE) {
+ cmd->error = MMC_ERR_TIMEOUT;
+ } else if (irq_stat & (IRQ_DCE | IRQ_DEBE)) {
+ cmd->error = MMC_ERR_BADCRC;
+ } else if (irq_stat & IRQ_ERROR_DATA_MASK) {
+ cmd->error = MMC_ERR_FAILED;
+ }
+
+ if (cmd->error) {
+ device_printf(sc->self, "Error interrupt occured\n");
+ sc->data_done = 1;
+ reset_controller_dat_cmd(sc);
+ return;
+ }
+
+ if (sc->data_done)
+ return;
+
+#ifdef FSL_SDHC_NO_DMA
+ if (irq_stat & IRQ_BRR) {
+ pio_read_transfer(sc);
+ }
+
+ if (irq_stat & IRQ_BWR) {
+ pio_write_transfer(sc);
+ }
+#else
+ if (irq_stat & IRQ_DINT) {
+ struct mmc_data *data = sc->request->cmd->data;
+
+ /* Synchronize DMA. */
+ if (data->flags & MMC_DATA_READ) {
+ bus_dmamap_sync(sc->dma_tag, sc->dma_map,
+ BUS_DMASYNC_POSTREAD);
+ memcpy(data->data, sc->dma_mem, data->len);
+ } else {
+ bus_dmamap_sync(sc->dma_tag, sc->dma_map,
+ BUS_DMASYNC_POSTWRITE);
+ }
+
+ /*
+ * TODO: For multiple block transfers, address of dma memory
+ * in DSADDR register should be set to the beginning of the
+ * segment here. Also offset to data pointer should be handled.
+ */
+ }
+#endif
+
+ if (irq_stat & IRQ_TC)
+ sc->data_done = 1;
+}
+
+static void
+interrupt_handler(void *arg)
+{
+ struct fsl_sdhc_softc *sc = (struct fsl_sdhc_softc *)arg;
+ uint32_t irq_stat;
+
+ mtx_lock(&sc->mtx);
+
+ irq_stat = read4(sc, SDHC_IRQSTAT);
+
+ /* Card interrupt. */
+ if (irq_stat & IRQ_CINT) {
+ DPRINTF("Card interrupt recievied\n");
+
+ }
+
+ /* Card insertion interrupt. */
+ if (irq_stat & IRQ_CINS) {
+ clear_bit(sc, SDHC_IRQSIGEN, IRQ_CINS);
+ clear_bit(sc, SDHC_IRQSTATEN, IRQ_CINS);
+ set_bit(sc, SDHC_IRQSIGEN, IRQ_CRM);
+ set_bit(sc, SDHC_IRQSTATEN, IRQ_CRM);
+
+ callout_reset(&sc->card_detect_callout, hz / 2,
+ card_detect_delay, sc);
+ }
+
+ /* Card removal interrupt. */
+ if (irq_stat & IRQ_CRM) {
+ clear_bit(sc, SDHC_IRQSIGEN, IRQ_CRM);
+ clear_bit(sc, SDHC_IRQSTATEN, IRQ_CRM);
+ set_bit(sc, SDHC_IRQSIGEN, IRQ_CINS);
+ set_bit(sc, SDHC_IRQSTATEN, IRQ_CINS);
+
+ callout_stop(&sc->card_detect_callout);
+ taskqueue_enqueue(taskqueue_swi_giant, &sc->card_detect_task);
+ }
+
+ /* Handle request interrupts. */
+ if (sc->request) {
+ handle_command_intr(sc, irq_stat);
+ handle_data_intr(sc, irq_stat);
+
+ /*
+ * Finalize request when transfer is done successfully
+ * or was interrupted due to error.
+ */
+ if ((sc->data_done && sc->command_done) ||
+ (sc->request->cmd->error))
+ finalize_request(sc);
+ }
+
+ /* Clear status register. */
+ write4(sc, SDHC_IRQSTAT, irq_stat);
+
+ mtx_unlock(&sc->mtx);
+}
+
+#ifndef FSL_SDHC_NO_DMA
+static void
+dma_get_phys_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
+{
+
+ if (error != 0)
+ return;
+
+ /* Get first segment's physical address. */
+ *(bus_addr_t *)arg = segs->ds_addr;
+}
+
+static int
+init_dma(struct fsl_sdhc_softc *sc)
+{
+ device_t self = sc->self;
+ int err;
+
+ err = bus_dma_tag_create(bus_get_dma_tag(self),
+ FSL_SDHC_DMA_BLOCK_SIZE, 0, BUS_SPACE_MAXADDR_32BIT,
+ BUS_SPACE_MAXADDR, NULL, NULL, FSL_SDHC_DMA_BLOCK_SIZE, 1,
+ FSL_SDHC_DMA_BLOCK_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL,
+ &sc->dma_tag);
+
+ if (err) {
+ device_printf(self, "Could not create DMA tag!\n");
+ return (-1);
+ }
+
+ err = bus_dmamem_alloc(sc->dma_tag, (void **)&(sc->dma_mem),
+ BUS_DMA_NOWAIT | BUS_DMA_NOCACHE, &sc->dma_map);
+ if (err) {
+ device_printf(self, "Could not allocate DMA memory!\n");
+ goto fail1;
+ }
+
+ err = bus_dmamap_load(sc->dma_tag, sc->dma_map, (void *)sc->dma_mem,
+ FSL_SDHC_DMA_BLOCK_SIZE, dma_get_phys_addr, &sc->dma_phys, 0);
+ if (err) {
+ device_printf(self, "Could not load DMA map!\n");
+ goto fail2;
+ }
+
+ return (0);
+
+fail2:
+ bus_dmamem_free(sc->dma_tag, sc->dma_mem, sc->dma_map);
+fail1:
+ bus_dma_tag_destroy(sc->dma_tag);
+
+ return (-1);
+}
+#endif /* FSL_SDHC_NO_DMA */
+
+static uint32_t
+set_xfertyp_register(const struct mmc_command *cmd)
+{
+ uint32_t xfertyp = 0;
+
+ /* Set command index. */
+ xfertyp |= cmd->opcode << CMDINX_SHIFT;
+
+ /* Set command type. */
+ if (cmd->opcode == MMC_STOP_TRANSMISSION)
+ xfertyp |= CMDTYP_ABORT;
+
+ /* Set data preset select. */
+ if (cmd->data) {
+ xfertyp |= XFERTYP_DPSEL;
+
+ /* Set transfer direction. */
+ if (cmd->data->flags & MMC_DATA_READ)
+ xfertyp |= XFERTYP_DTDSEL;
+ }
+
+ /* Set command index check. */
+ if (cmd->flags & MMC_RSP_OPCODE)
+ xfertyp |= XFERTYP_CICEN;
+
+ /* Set command CRC check. */
+ if (cmd->flags & MMC_RSP_CRC)
+ xfertyp |= XFERTYP_CCCEN;
+
+ /* Set response type */
+ if (!(cmd->flags & MMC_RSP_PRESENT))
+ xfertyp |= RSPTYP_NONE;
+ else if (cmd->flags & MMC_RSP_136)
+ xfertyp |= RSPTYP_136;
+ else if (cmd->flags & MMC_RSP_BUSY)
+ xfertyp |= RSPTYP_48_BUSY;
+ else
+ xfertyp |= RSPTYP_48;
+
+#ifndef FSL_SDHC_NO_DMA
+ /* Enable DMA */
+ xfertyp |= XFERTYP_DMAEN;
+#endif
+
+ return (xfertyp);
+}
+
+static uint32_t
+set_blkattr_register(const struct mmc_data *data)
+{
+
+ if (data->len <= FSL_SDHC_MAX_BLOCK_SIZE) {
+ /* One block transfer. */
+ return (BLKATTR_BLOCK_COUNT(1) | ((data->len) &
+ BLKATTR_BLKSZE));
+ }
+
+ /* TODO: Write code here for multi-block transfers. */
+ return (0);
+}
+
+/**
+ * Initiate data transfer. Interrupt handler will finalize it.
+ * @todo Implement multi-block transfers.
+ * @param sc
+ * @param cmd
+ */
+static int
+start_data(struct fsl_sdhc_softc *sc, struct mmc_data *data)
+{
+ uint32_t reg;
+
+ if ((uint32_t)data->data & 0x3) {
+ device_printf(sc->self, "32-bit unaligned data pointer in "
+ "request\n");
+ return (-1);
+ }
+
+ sc->data_done = 0;
+
+#ifdef FSL_SDHC_NO_DMA
+ sc->data_ptr = data->data;
+ sc->data_offset = 0;
+#else
+ /* Write DMA address register. */
+ write4(sc, SDHC_DSADDR, sc->dma_phys);
+
+ /* Synchronize DMA. */
+ if (data->flags & MMC_DATA_READ) {
+ bus_dmamap_sync(sc->dma_tag, sc->dma_map,
+ BUS_DMASYNC_PREREAD);
+ } else {
+ memcpy(sc->dma_mem, data->data, data->len);
+ bus_dmamap_sync(sc->dma_tag, sc->dma_map,
+ BUS_DMASYNC_PREWRITE);
+ }
+#endif
+ /* Set block size and count. */
+ reg = set_blkattr_register(data);
+ if (reg == 0) {
+ device_printf(sc->self, "Requested unsupported multi-block "
+ "transfer.\n");
+ return (-1);
+ }
+ write4(sc, SDHC_BLKATTR, reg);
+
+ return (0);
+}
+
+static int
+start_command(struct fsl_sdhc_softc *sc, struct mmc_command *cmd)
+{
+ struct mmc_request *req = sc->request;
+ uint32_t mask;
+ uint32_t xfertyp;
+ int err;
+
+ DPRINTF("opcode %d, flags 0x%08x\n", cmd->opcode, cmd->flags);
+ DPRINTF("PRSSTAT = 0x%08x\n", read4(sc, SDHC_PRSSTAT));
+
+ sc->command_done = 0;
+
+ cmd->error = MMC_ERR_NONE;
+
+ /* TODO: should we check here for card presence and clock settings? */
+
+ /* Always wait for free CMD line. */
+ mask = SDHC_CMD_LINE;
+ /* Wait for free DAT if we have data or busy signal. */
+ if (cmd->data || (cmd->flags & MMC_RSP_BUSY))
+ mask |= SDHC_DAT_LINE;
+ /* We shouldn't wait for DAT for stop commands. */
+ if (cmd == req->stop)
+ mask &= ~SDHC_DAT_LINE;
+ err = wait_for_free_line(sc, mask);
+ if (err != 0) {
+ device_printf(sc->self, "Controller never released inhibit "
+ "bit(s).\n");
+ reset_controller_dat_cmd(sc);
+ cmd->error = MMC_ERR_FAILED;
+ sc->request = NULL;
+ req->done(req);
+ return (-1);
+ }
+
+ xfertyp = set_xfertyp_register(cmd);
+
+ if (cmd->data != NULL) {
+ err = start_data(sc, cmd->data);
+ if (err != 0) {
+ device_printf(sc->self,
+ "Data transfer request failed\n");
+ reset_controller_dat_cmd(sc);
+ cmd->error = MMC_ERR_FAILED;
+ sc->request = NULL;
+ req->done(req);
+ return (-1);
+ }
+ }
+
+ write4(sc, SDHC_CMDARG, cmd->arg);
+ write4(sc, SDHC_XFERTYP, xfertyp);
+
+ DPRINTF("XFERTYP = 0x%08x\n", xfertyp);
+ DPRINTF("CMDARG = 0x%08x\n", cmd->arg);
+
+ return (0);
+}
+
+#ifdef DEBUG
+static void
+dump_registers(struct fsl_sdhc_softc *sc)
+{
+ printf("PRSSTAT = 0x%08x\n", read4(sc, SDHC_PRSSTAT));
+ printf("PROCTL = 0x%08x\n", read4(sc, SDHC_PROCTL));
+ printf("PMUXCR = 0x%08x\n", ccsr_read4(OCP85XX_PMUXCR));
+ printf("HOSTCAPBLT = 0x%08x\n", read4(sc, SDHC_HOSTCAPBLT));
+ printf("IRQSTAT = 0x%08x\n", read4(sc, SDHC_IRQSTAT));
+ printf("IRQSTATEN = 0x%08x\n", read4(sc, SDHC_IRQSTATEN));
+ printf("IRQSIGEN = 0x%08x\n", read4(sc, SDHC_IRQSIGEN));
+ printf("WML = 0x%08x\n", read4(sc, SDHC_WML));
+ printf("DSADDR = 0x%08x\n", read4(sc, SDHC_DSADDR));
+ printf("XFERTYP = 0x%08x\n", read4(sc, SDHC_XFERTYP));
+ printf("ECMCR = 0x%08x\n", ccsr_read4(OCP85XX_ECMCR));
+ printf("DCR = 0x%08x\n", read4(sc, SDHC_DCR));
+}
+#endif
+
+/*****************************************************************************
+ * Public methods
+ *****************************************************************************/
+/*
+ * Device interface methods.
+ */
+static int
+fsl_sdhc_probe(device_t self)
+{
+ static const char *desc =
+ "Freescale Enhanced Secure Digital Host Controller";
+
+ if (!ofw_bus_is_compatible(self, "fsl,p2020-esdhc") &&
+ !ofw_bus_is_compatible(self, "fsl,esdhc"))
+ return (ENXIO);
+
+ device_set_desc(self, desc);
+
+ return (BUS_PROBE_VENDOR);
+}
+
+static int
+fsl_sdhc_attach(device_t self)
+{
+ struct fsl_sdhc_softc *sc;
+
+ sc = device_get_softc(self);
+
+ sc->self = self;
+
+ mtx_init(&sc->mtx, device_get_nameunit(self), NULL, MTX_DEF);
+
+ /* Setup memory resource */
+ sc->mem_rid = 0;
+ sc->mem_resource = bus_alloc_resource_any(self, SYS_RES_MEMORY,
+ &sc->mem_rid, RF_ACTIVE);
+ if (sc->mem_resource == NULL) {
+ device_printf(self, "Could not allocate memory.\n");
+ goto fail;
+ }
+ sc->bst = rman_get_bustag(sc->mem_resource);
+ sc->bsh = rman_get_bushandle(sc->mem_resource);
+
+ /* Setup interrupt resource. */
+ sc->irq_rid = 0;
+ sc->irq_resource = bus_alloc_resource_any(self, SYS_RES_IRQ,
+ &sc->irq_rid, RF_ACTIVE);
+ if (sc->irq_resource == NULL) {
+ device_printf(self, "Could not allocate interrupt.\n");
+ goto fail;
+ }
+ if (bus_setup_intr(self, sc->irq_resource, INTR_TYPE_MISC |
+ INTR_MPSAFE, NULL, interrupt_handler, sc, &sc->ihl) != 0) {
+ device_printf(self, "Could not setup interrupt.\n");
+ goto fail;
+ }
+
+ /* Setup DMA. */
+#ifndef FSL_SDHC_NO_DMA
+ if (init_dma(sc) != 0) {
+ device_printf(self, "Could not setup DMA\n");
+ }
+#endif
+ sc->bus_busy = 0;
+ sc->platform_clock = get_platform_clock(sc);
+ if (sc->platform_clock == 0) {
+ device_printf(self, "Could not get platform clock.\n");
+ goto fail;
+ }
+ sc->command_done = 1;
+ sc->data_done = 1;
+
+ /* Init card detection task. */
+ TASK_INIT(&sc->card_detect_task, 0, card_detect_task, sc);
+ callout_init(&sc->card_detect_callout, 1);
+
+ reset_controller_all(sc);
+ init_controller(sc);
+ set_clock(sc, 400000);
+ send_80_clock_ticks(sc);
+
+#ifdef DEBUG
+ dump_registers(sc);
+#endif
+
+ return (0);
+
+fail:
+ fsl_sdhc_detach(self);
+ return (ENXIO);
+}
+
+static int
+fsl_sdhc_detach(device_t self)
+{
+ struct fsl_sdhc_softc *sc = device_get_softc(self);
+ int err;
+
+ if (sc->child)
+ device_delete_child(self, sc->child);
+
+ taskqueue_drain(taskqueue_swi_giant, &sc->card_detect_task);
+
+#ifndef FSL_SDHC_NO_DMA
+ bus_dmamap_unload(sc->dma_tag, sc->dma_map);
+ bus_dmamem_free(sc->dma_tag, sc->dma_mem, sc->dma_map);
+ bus_dma_tag_destroy(sc->dma_tag);
+#endif
+
+ if (sc->ihl != NULL) {
+ err = bus_teardown_intr(self, sc->irq_resource, sc->ihl);
+ if (err)
+ return (err);
+ }
+ if (sc->irq_resource != NULL) {
+ err = bus_release_resource(self, SYS_RES_IRQ, sc->irq_rid,
+ sc->irq_resource);
+ if (err)
+ return (err);
+
+ }
+ if (sc->mem_resource != NULL) {
+ err = bus_release_resource(self, SYS_RES_MEMORY, sc->mem_rid,
+ sc->mem_resource);
+ if (err)
+ return (err);
+ }
+
+ mtx_destroy(&sc->mtx);
+
+ return (0);
+}
+
+
+/*
+ * Bus interface methods.
+ */
+static int
+fsl_sdhc_read_ivar(device_t self, device_t child, int index,
+ uintptr_t *result)
+{
+ struct mmc_host *host = device_get_ivars(child);
+
+ switch (index) {
+ case MMCBR_IVAR_BUS_MODE:
+ *(int *)result = host->ios.bus_mode;
+ break;
+ case MMCBR_IVAR_BUS_WIDTH:
+ *(int *)result = host->ios.bus_width;
+ break;
+ case MMCBR_IVAR_CHIP_SELECT:
+ *(int *)result = host->ios.chip_select;
+ break;
+ case MMCBR_IVAR_CLOCK:
+ *(int *)result = host->ios.clock;
+ break;
+ case MMCBR_IVAR_F_MIN:
+ *(int *)result = host->f_min;
+ break;
+ case MMCBR_IVAR_F_MAX:
+ *(int *)result = host->f_max;
+ break;
+ case MMCBR_IVAR_HOST_OCR:
+ *(int *)result = host->host_ocr;
+ break;
+ case MMCBR_IVAR_MODE:
+ *(int *)result = host->mode;
+ break;
+ case MMCBR_IVAR_OCR:
+ *(int *)result = host->ocr;
+ break;
+ case MMCBR_IVAR_POWER_MODE:
+ *(int *)result = host->ios.power_mode;
+ break;
+ case MMCBR_IVAR_VDD:
+ *(int *)result = host->ios.vdd;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int
+fsl_sdhc_write_ivar(device_t self, device_t child, int index,
+ uintptr_t value)
+{
+ struct mmc_host *host = device_get_ivars(child);
+
+ switch (index) {
+ case MMCBR_IVAR_BUS_MODE:
+ host->ios.bus_mode = value;
+ break;
+ case MMCBR_IVAR_BUS_WIDTH:
+ host->ios.bus_width = value;
+ break;
+ case MMCBR_IVAR_CHIP_SELECT:
+ host->ios.chip_select = value;
+ break;
+ case MMCBR_IVAR_CLOCK:
+ host->ios.clock = value;
+ break;
+ case MMCBR_IVAR_MODE:
+ host->mode = value;
+ break;
+ case MMCBR_IVAR_OCR:
+ host->ocr = value;
+ break;
+ case MMCBR_IVAR_POWER_MODE:
+ host->ios.power_mode = value;
+ break;
+ case MMCBR_IVAR_VDD:
+ host->ios.vdd = value;
+ break;
+ case MMCBR_IVAR_HOST_OCR:
+ case MMCBR_IVAR_F_MIN:
+ case MMCBR_IVAR_F_MAX:
+ default:
+ /* Instance variable not writable. */
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+
+/*
+ * MMC bridge methods.
+ */
+static int
+fsl_sdhc_update_ios(device_t self, device_t reqdev)
+{
+ struct fsl_sdhc_softc *sc = device_get_softc(self);
+ struct mmc_host *host = device_get_ivars(reqdev);
+ struct mmc_ios *ios = &host->ios;
+
+ mtx_lock(&sc->mtx);
+
+ /* Full reset on bus power down to clear from any state. */
+ if (ios->power_mode == power_off) {
+ reset_controller_all(sc);
+ init_controller(sc);
+ }
+
+ set_clock(sc, ios->clock);
+ set_bus_width(sc, ios->bus_width);
+
+ mtx_unlock(&sc->mtx);
+
+ return (0);
+}
+
+static int
+fsl_sdhc_request(device_t self, device_t reqdev, struct mmc_request *req)
+{
+ struct fsl_sdhc_softc *sc = device_get_softc(self);
+ int err;
+
+ mtx_lock(&sc->mtx);
+
+ sc->request = req;
+ err = start_command(sc, req->cmd);
+
+ mtx_unlock(&sc->mtx);
+
+ return (err);
+}
+
+static int
+fsl_sdhc_get_ro(device_t self, device_t reqdev)
+{
+ struct fsl_sdhc_softc *sc = device_get_softc(self);
+
+ /* Wouldn't it be faster using branching (if {}) ?? */
+ return (((read4(sc, SDHC_PRSSTAT) & PRSSTAT_WPSPL) >> 19) ^ 0x1);
+}
+
+static int
+fsl_sdhc_acquire_host(device_t self, device_t reqdev)
+{
+ struct fsl_sdhc_softc *sc = device_get_softc(self);
+ int retval = 0;
+
+ mtx_lock(&sc->mtx);
+
+ while (sc->bus_busy)
+ retval = mtx_sleep(sc, &sc->mtx, PZERO, "sdhcah", 0);
+ ++(sc->bus_busy);
+
+ mtx_unlock(&sc->mtx);
+
+ return (retval);
+}
+
+static int
+fsl_sdhc_release_host(device_t self, device_t reqdev)
+{
+ struct fsl_sdhc_softc *sc = device_get_softc(self);
+
+ mtx_lock(&sc->mtx);
+ --(sc->bus_busy);
+ mtx_unlock(&sc->mtx);
+ wakeup(sc);
+
+ return (0);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/fsl_sdhc.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/powerpc/mpc85xx/fsl_sdhc.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,297 @@
+/*-
+ * Copyright (c) 2011-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/powerpc/mpc85xx/fsl_sdhc.h 236121 2012-05-26 21:07:15Z raj $
+ */
+
+#ifndef FSL_SDHC_H_
+#define FSL_SDHC_H_
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <machine/bus.h>
+
+#include <dev/mmc/bridge.h>
+#include <dev/mmc/mmcreg.h>
+#include <dev/mmc/mmcvar.h>
+#include <dev/mmc/mmcbrvar.h>
+
+#include "mmcbr_if.h"
+
+
+/*****************************************************************************
+ * Private defines
+ *****************************************************************************/
+struct slot {
+ uint32_t clock;
+};
+
+struct fsl_sdhc_softc {
+ device_t self;
+ device_t child;
+
+ bus_space_handle_t bsh;
+ bus_space_tag_t bst;
+
+ struct resource *mem_resource;
+ int mem_rid;
+ struct resource *irq_resource;
+ int irq_rid;
+ void *ihl;
+
+ bus_dma_tag_t dma_tag;
+ bus_dmamap_t dma_map;
+ uint32_t* dma_mem;
+ bus_addr_t dma_phys;
+
+ struct mtx mtx;
+
+ struct task card_detect_task;
+ struct callout card_detect_callout;
+
+ struct mmc_host mmc_host;
+
+ struct slot slot;
+ uint32_t bus_busy;
+ uint32_t platform_clock;
+
+ struct mmc_request *request;
+ int data_done;
+ int command_done;
+ int use_dma;
+ uint32_t* data_ptr;
+ uint32_t data_offset;
+};
+
+#define FSL_SDHC_RESET_DELAY 50
+
+#define FSL_SDHC_BASE_CLOCK_DIV (2)
+#define FSL_SDHC_MAX_DIV (FSL_SDHC_BASE_CLOCK_DIV * 256 * 16)
+#define FSL_SDHC_MIN_DIV (FSL_SDHC_BASE_CLOCK_DIV * 2)
+#define FSL_SDHC_MAX_CLOCK (50000000)
+
+#define FSL_SDHC_MAX_BLOCK_COUNT (65535)
+#define FSL_SDHC_MAX_BLOCK_SIZE (4096)
+
+#define FSL_SDHC_FIFO_BUF_SIZE (64) /* Water-mark level. */
+#define FSL_SDHC_FIFO_BUF_WORDS (FSL_SDHC_FIFO_BUF_SIZE / 4)
+
+#define FSL_SDHC_DMA_SEGMENT_SIZE (1024)
+#define FSL_SDHC_DMA_ALIGNMENT (4)
+#define FSL_SDHC_DMA_BLOCK_SIZE FSL_SDHC_MAX_BLOCK_SIZE
+
+
+/*
+ * Offsets of SD HC registers
+ */
+enum sdhc_reg_off {
+ SDHC_DSADDR = 0x000,
+ SDHC_BLKATTR = 0x004,
+ SDHC_CMDARG = 0x008,
+ SDHC_XFERTYP = 0x00c,
+ SDHC_CMDRSP0 = 0x010,
+ SDHC_CMDRSP1 = 0x014,
+ SDHC_CMDRSP2 = 0x018,
+ SDHC_CMDRSP3 = 0x01c,
+ SDHC_DATPORT = 0x020,
+ SDHC_PRSSTAT = 0x024,
+ SDHC_PROCTL = 0x028,
+ SDHC_SYSCTL = 0x02c,
+ SDHC_IRQSTAT = 0x030,
+ SDHC_IRQSTATEN = 0x034,
+ SDHC_IRQSIGEN = 0x038,
+ SDHC_AUTOC12ERR = 0x03c,
+ SDHC_HOSTCAPBLT = 0x040,
+ SDHC_WML = 0x044,
+ SDHC_FEVT = 0x050,
+ SDHC_HOSTVER = 0x0fc,
+ SDHC_DCR = 0x40c
+};
+
+enum sysctl_bit {
+ SYSCTL_INITA = 0x08000000,
+ SYSCTL_RSTD = 0x04000000,
+ SYSCTL_RSTC = 0x02000000,
+ SYSCTL_RSTA = 0x01000000,
+ SYSCTL_DTOCV = 0x000f0000,
+ SYSCTL_SDCLKFS = 0x0000ff00,
+ SYSCTL_DVS = 0x000000f0,
+ SYSCTL_PEREN = 0x00000004,
+ SYSCTL_HCKEN = 0x00000002,
+ SYSCTL_IPGEN = 0x00000001
+};
+
+#define HEX_LEFT_SHIFT(x) (4 * x)
+enum sysctl_shift {
+ SHIFT_DTOCV = HEX_LEFT_SHIFT(4),
+ SHIFT_SDCLKFS = HEX_LEFT_SHIFT(2),
+ SHIFT_DVS = HEX_LEFT_SHIFT(1)
+};
+
+enum proctl_bit {
+ PROCTL_WECRM = 0x04000000,
+ PROCTL_WECINS = 0x02000000,
+ PROCTL_WECINT = 0x01000000,
+ PROCTL_RWCTL = 0x00040000,
+ PROCTL_CREQ = 0x00020000,
+ PROCTL_SABGREQ = 0x00010000,
+ PROCTL_CDSS = 0x00000080,
+ PROCTL_CDTL = 0x00000040,
+ PROCTL_EMODE = 0x00000030,
+ PROCTL_D3CD = 0x00000008,
+ PROCTL_DTW = 0x00000006
+};
+
+enum dtw {
+ DTW_1 = 0x00000000,
+ DTW_4 = 0x00000002,
+ DTW_8 = 0x00000004
+};
+
+enum prsstat_bit {
+ PRSSTAT_DLSL = 0xff000000,
+ PRSSTAT_CLSL = 0x00800000,
+ PRSSTAT_WPSPL = 0x00080000,
+ PRSSTAT_CDPL = 0x00040000,
+ PRSSTAT_CINS = 0x00010000,
+ PRSSTAT_BREN = 0x00000800,
+ PRSSTAT_BWEN = 0x00000400,
+ PRSSTAT_RTA = 0x00000200,
+ PRSSTAT_WTA = 0x00000100,
+ PRSSTAT_SDOFF = 0x00000080,
+ PRSSTAT_PEROFF = 0x00000040,
+ PRSSTAT_HCKOFF = 0x00000020,
+ PRSSTAT_IPGOFF = 0x00000010,
+ PRSSTAT_DLA = 0x00000004,
+ PRSSTAT_CDIHB = 0x00000002,
+ PRSSTAT_CIHB = 0x00000001
+
+};
+
+enum irq_bits {
+ IRQ_DMAE = 0x10000000,
+ IRQ_AC12E = 0x01000000,
+ IRQ_DEBE = 0x00400000,
+ IRQ_DCE = 0x00200000,
+ IRQ_DTOE = 0x00100000,
+ IRQ_CIE = 0x00080000,
+ IRQ_CEBE = 0x00040000,
+ IRQ_CCE = 0x00020000,
+ IRQ_CTOE = 0x00010000,
+ IRQ_CINT = 0x00000100,
+ IRQ_CRM = 0x00000080,
+ IRQ_CINS = 0x00000040,
+ IRQ_BRR = 0x00000020,
+ IRQ_BWR = 0x00000010,
+ IRQ_DINT = 0x00000008,
+ IRQ_BGE = 0x00000004,
+ IRQ_TC = 0x00000002,
+ IRQ_CC = 0x00000001
+};
+
+enum irq_masks {
+ IRQ_ERROR_DATA_MASK = IRQ_DMAE | IRQ_DEBE | IRQ_DCE | IRQ_DTOE,
+ IRQ_ERROR_CMD_MASK = IRQ_AC12E | IRQ_CIE | IRQ_CTOE | IRQ_CCE |
+ IRQ_CEBE
+};
+
+enum dcr_bits {
+ DCR_PRI = 0x0000c000,
+ DCR_SNOOP = 0x00000040,
+ DCR_AHB2MAG_BYPASS = 0x00000020,
+ DCR_RD_SAFE = 0x00000004,
+ DCR_RD_PFE = 0x00000002,
+ DCR_RD_PF_SIZE = 0x00000001
+};
+
+#define DCR_PRI_SHIFT (14)
+
+enum xfertyp_bits {
+ XFERTYP_CMDINX = 0x3f000000,
+ XFERTYP_CMDTYP = 0x00c00000,
+ XFERTYP_DPSEL = 0x00200000,
+ XFERTYP_CICEN = 0x00100000,
+ XFERTYP_CCCEN = 0x00080000,
+ XFERTYP_RSPTYP = 0x00030000,
+ XFERTYP_MSBSEL = 0x00000020,
+ XFERTYP_DTDSEL = 0x00000010,
+ XFERTYP_AC12EN = 0x00000004,
+ XFERTYP_BCEN = 0x00000002,
+ XFERTYP_DMAEN = 0x00000001
+};
+
+#define CMDINX_SHIFT (24)
+
+enum xfertyp_cmdtyp {
+ CMDTYP_NORMAL = 0x00000000,
+ CMDYTP_SUSPEND = 0x00400000,
+ CMDTYP_RESUME = 0x00800000,
+ CMDTYP_ABORT = 0x00c00000
+};
+
+enum xfertyp_rsptyp {
+ RSPTYP_NONE = 0x00000000,
+ RSPTYP_136 = 0x00010000,
+ RSPTYP_48 = 0x00020000,
+ RSPTYP_48_BUSY = 0x00030000
+};
+
+enum blkattr_bits {
+ BLKATTR_BLKSZE = 0x00001fff,
+ BLKATTR_BLKCNT = 0xffff0000
+};
+#define BLKATTR_BLOCK_COUNT(x) (x << 16)
+
+enum wml_bits {
+ WR_WML = 0x00ff0000,
+ RD_WML = 0x000000ff,
+};
+
+enum sdhc_bit_mask {
+ MASK_CLOCK_CONTROL = 0x0000ffff,
+ MASK_IRQ_ALL = IRQ_DMAE | IRQ_AC12E | IRQ_DEBE | IRQ_DCE |
+ IRQ_DTOE | IRQ_CIE | IRQ_CEBE | IRQ_CCE |
+ IRQ_CTOE | IRQ_CINT | IRQ_CRM | IRQ_CINS |
+ IRQ_BRR | IRQ_BWR | IRQ_DINT | IRQ_BGE |
+ IRQ_TC | IRQ_CC,
+};
+
+enum sdhc_line {
+ SDHC_DAT_LINE = 0x2,
+ SDHC_CMD_LINE = 0x1
+};
+
+#endif /* FSL_SDHC_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/i2c.c
--- a/head/sys/powerpc/mpc85xx/i2c.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/mpc85xx/i2c.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/i2c.c 235935 2012-05-24 21:09:38Z marcel $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -72,9 +72,6 @@
#define I2C_BAUD_RATE_DEF 0x3F
#define I2C_DFSSR_DIV 0x10
-#define DEBUG
-#undef DEBUG
-
#ifdef DEBUG
#define debugf(fmt, args...) do { printf("%s(): ", __func__); printf(fmt,##args); } while (0)
#else
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/lbc.c
--- a/head/sys/powerpc/mpc85xx/lbc.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/mpc85xx/lbc.c Wed Jul 25 16:40:53 2012 +0300
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/lbc.c 238045 2012-07-03 00:06:14Z marcel $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -56,9 +56,6 @@
#include "ofw_bus_if.h"
#include "lbc.h"
-#define DEBUG
-#undef DEBUG
-
#ifdef DEBUG
#define debugf(fmt, args...) do { printf("%s(): ", __func__); \
printf(fmt,##args); } while (0)
@@ -66,20 +63,6 @@
#define debugf(fmt, args...)
#endif
-static __inline void
-lbc_write_reg(struct lbc_softc *sc, bus_size_t off, uint32_t val)
-{
-
- bus_space_write_4(sc->sc_bst, sc->sc_bsh, off, val);
-}
-
-static __inline uint32_t
-lbc_read_reg(struct lbc_softc *sc, bus_size_t off)
-{
-
- return (bus_space_read_4(sc->sc_bst, sc->sc_bsh, off));
-}
-
static MALLOC_DEFINE(M_LBC, "localbus", "localbus devices information");
static int lbc_probe(device_t);
@@ -161,46 +144,123 @@
static void
lbc_banks_unmap(struct lbc_softc *sc)
{
- int i;
+ int r;
- for (i = 0; i < LBC_DEV_MAX; i++) {
- if (sc->sc_banks[i].size == 0)
- continue;
+ r = 0;
+ while (r < LBC_DEV_MAX) {
+ if (sc->sc_range[r].size == 0)
+ return;
- law_disable(OCP85XX_TGTIF_LBC, sc->sc_banks[i].pa,
- sc->sc_banks[i].size);
- pmap_unmapdev(sc->sc_banks[i].va, sc->sc_banks[i].size);
+ pmap_unmapdev(sc->sc_range[r].kva, sc->sc_range[r].size);
+ law_disable(OCP85XX_TGTIF_LBC, sc->sc_range[r].addr,
+ sc->sc_range[r].size);
+ r++;
}
}
static int
lbc_banks_map(struct lbc_softc *sc)
{
- u_long start, size;
- int error, i;
+ vm_paddr_t end, start;
+ vm_size_t size;
+ u_int i, r, ranges, s;
+ int error;
+ bzero(sc->sc_range, sizeof(sc->sc_range));
+
+ /*
+ * Determine number of discontiguous address ranges to program.
+ */
+ ranges = 0;
for (i = 0; i < LBC_DEV_MAX; i++) {
- if (sc->sc_banks[i].size == 0)
+ size = sc->sc_banks[i].size;
+ if (size == 0)
continue;
- /* Physical address start/size. */
- start = sc->sc_banks[i].pa;
- size = sc->sc_banks[i].size;
+ start = sc->sc_banks[i].addr;
+ for (r = 0; r < ranges; r++) {
+ /* Avoid wrap-around bugs. */
+ end = sc->sc_range[r].addr - 1 + sc->sc_range[r].size;
+ if (start > 0 && end == start - 1) {
+ sc->sc_range[r].size += size;
+ break;
+ }
+ /* Avoid wrap-around bugs. */
+ end = start - 1 + size;
+ if (sc->sc_range[r].addr > 0 &&
+ end == sc->sc_range[r].addr - 1) {
+ sc->sc_range[r].addr = start;
+ sc->sc_range[r].size += size;
+ break;
+ }
+ }
+ if (r == ranges) {
+ /* New range; add using insertion sort */
+ r = 0;
+ while (r < ranges && sc->sc_range[r].addr < start)
+ r++;
+ for (s = ranges; s > r; s--)
+ sc->sc_range[s] = sc->sc_range[s-1];
+ sc->sc_range[r].addr = start;
+ sc->sc_range[r].size = size;
+ ranges++;
+ }
+ }
- /*
- * Configure LAW for this LBC bank (CS) and map its physical
- * memory region into KVA.
- */
+ /*
+ * Ranges are sorted so quickly go over the list to merge ranges
+ * that grew toward each other while building the ranges.
+ */
+ r = 0;
+ while (r < ranges - 1) {
+ end = sc->sc_range[r].addr + sc->sc_range[r].size;
+ if (end != sc->sc_range[r+1].addr) {
+ r++;
+ continue;
+ }
+ sc->sc_range[r].size += sc->sc_range[r+1].size;
+ for (s = r + 1; s < ranges - 1; s++)
+ sc->sc_range[s] = sc->sc_range[s+1];
+ bzero(&sc->sc_range[s], sizeof(sc->sc_range[s]));
+ ranges--;
+ }
+
+ /*
+ * Configure LAW for the LBC ranges and map the physical memory
+ * range into KVA.
+ */
+ for (r = 0; r < ranges; r++) {
+ start = sc->sc_range[r].addr;
+ size = sc->sc_range[r].size;
error = law_enable(OCP85XX_TGTIF_LBC, start, size);
if (error)
return (error);
+ sc->sc_range[r].kva = (vm_offset_t)pmap_mapdev(start, size);
+ }
- sc->sc_banks[i].va = (vm_offset_t)pmap_mapdev(start, size);
- if (sc->sc_banks[i].va == 0) {
- lbc_banks_unmap(sc);
- return (ENOSPC);
+ /* XXX: need something better here? */
+ if (ranges == 0)
+ return (EINVAL);
+
+ /* Assign KVA to banks based on the enclosing range. */
+ for (i = 0; i < LBC_DEV_MAX; i++) {
+ size = sc->sc_banks[i].size;
+ if (size == 0)
+ continue;
+
+ start = sc->sc_banks[i].addr;
+ for (r = 0; r < ranges; r++) {
+ end = sc->sc_range[r].addr - 1 + sc->sc_range[r].size;
+ if (start >= sc->sc_range[r].addr &&
+ start - 1 + size <= end)
+ break;
+ }
+ if (r < ranges) {
+ sc->sc_banks[i].kva = sc->sc_range[r].kva +
+ (start - sc->sc_range[r].addr);
}
}
+
return (0);
}
@@ -215,12 +275,11 @@
size = sc->sc_banks[i].size;
if (size == 0)
continue;
+
/*
* Compute and program BR value.
*/
- regval = 0;
- regval |= sc->sc_banks[i].pa;
-
+ regval = sc->sc_banks[i].addr;
switch (sc->sc_banks[i].width) {
case 8:
regval |= (1 << 11);
@@ -240,24 +299,22 @@
regval |= (sc->sc_banks[i].msel << 5);
regval |= (sc->sc_banks[i].atom << 2);
regval |= 1;
-
- lbc_write_reg(sc, LBC85XX_BR(i), regval);
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh,
+ LBC85XX_BR(i), regval);
/*
* Compute and program OR value.
*/
- regval = 0;
- regval |= lbc_address_mask(size);
-
+ regval = lbc_address_mask(size);
switch (sc->sc_banks[i].msel) {
case LBCRES_MSEL_GPCM:
/* TODO Add flag support for option registers */
- regval |= 0x00000ff7;
+ regval |= 0x0ff7;
break;
case LBCRES_MSEL_FCM:
- printf("FCM mode not supported yet!");
- error = ENOSYS;
- goto fail;
+ /* TODO Add flag support for options register */
+ regval |= 0x0796;
+ break;
case LBCRES_MSEL_UPMA:
case LBCRES_MSEL_UPMB:
case LBCRES_MSEL_UPMC:
@@ -265,27 +322,10 @@
error = ENOSYS;
goto fail;
}
- lbc_write_reg(sc, LBC85XX_OR(i), regval);
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh,
+ LBC85XX_OR(i), regval);
}
- /*
- * Initialize configuration register:
- * - enable Local Bus
- * - set data buffer control signal function
- * - disable parity byte select
- * - set ECC parity type
- * - set bus monitor timing and timer prescale
- */
- lbc_write_reg(sc, LBC85XX_LBCR, 0);
-
- /*
- * Initialize clock ratio register:
- * - disable PLL bypass mode
- * - configure LCLK delay cycles for the assertion of LALE
- * - set system clock divider
- */
- lbc_write_reg(sc, LBC85XX_LCRR, 0x00030008);
-
return (0);
fail:
@@ -348,7 +388,7 @@
reg += addr_cells - 1 + size_cells;
/* Calculate address range relative to VA base. */
- start = sc->sc_banks[bank].va + start;
+ start = sc->sc_banks[bank].kva + start;
end = start + count - 1;
debugf("reg addr bank = %d, start = %lx, end = %lx, "
@@ -364,6 +404,18 @@
return (rv);
}
+static void
+lbc_intr(void *arg)
+{
+ struct lbc_softc *sc = arg;
+ uint32_t ltesr;
+
+ ltesr = bus_space_read_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTESR);
+ sc->sc_ltesr = ltesr;
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTESR, ltesr);
+ wakeup(sc->sc_dev);
+}
+
static int
lbc_probe(device_t dev)
{
@@ -393,14 +445,59 @@
sc = device_get_softc(dev);
sc->sc_dev = dev;
- sc->sc_rid = 0;
- sc->sc_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->sc_rid,
+ sc->sc_mrid = 0;
+ sc->sc_mres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->sc_mrid,
RF_ACTIVE);
- if (sc->sc_res == NULL)
+ if (sc->sc_mres == NULL)
return (ENXIO);
- sc->sc_bst = rman_get_bustag(sc->sc_res);
- sc->sc_bsh = rman_get_bushandle(sc->sc_res);
+ sc->sc_bst = rman_get_bustag(sc->sc_mres);
+ sc->sc_bsh = rman_get_bushandle(sc->sc_mres);
+
+ for (bank = 0; bank < LBC_DEV_MAX; bank++) {
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_BR(bank), 0);
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_OR(bank), 0);
+ }
+
+ /*
+ * Initialize configuration register:
+ * - enable Local Bus
+ * - set data buffer control signal function
+ * - disable parity byte select
+ * - set ECC parity type
+ * - set bus monitor timing and timer prescale
+ */
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LBCR, 0);
+
+ /*
+ * Initialize clock ratio register:
+ * - disable PLL bypass mode
+ * - configure LCLK delay cycles for the assertion of LALE
+ * - set system clock divider
+ */
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LCRR, 0x00030008);
+
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTEDR, 0);
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTESR, ~0);
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTEIR, 0x64080001);
+
+ sc->sc_irid = 0;
+ sc->sc_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->sc_irid,
+ RF_ACTIVE | RF_SHAREABLE);
+ if (sc->sc_ires != NULL) {
+ error = bus_setup_intr(dev, sc->sc_ires,
+ INTR_TYPE_MISC | INTR_MPSAFE, NULL, lbc_intr, sc,
+ &sc->sc_icookie);
+ if (error) {
+ device_printf(dev, "could not activate interrupt\n");
+ bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irid,
+ sc->sc_ires);
+ sc->sc_ires = NULL;
+ }
+ }
+
+ sc->sc_ltesr = ~0;
+
rangesptr = NULL;
rm = &sc->sc_rman;
@@ -479,7 +576,7 @@
debugf("bank = %d, start = %lx, size = %lx\n", bank,
start, size);
- sc->sc_banks[bank].pa = start + offset;
+ sc->sc_banks[bank].addr = start + offset;
sc->sc_banks[bank].size = size;
/*
@@ -552,7 +649,7 @@
fail:
free(rangesptr, M_OFWPROP);
- bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_rid, sc->sc_res);
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_mrid, sc->sc_mres);
return (error);
}
@@ -676,3 +773,53 @@
di = device_get_ivars(child);
return (&di->di_ofw);
}
+
+void
+lbc_write_reg(device_t child, u_int off, uint32_t val)
+{
+ device_t dev;
+ struct lbc_softc *sc;
+
+ dev = device_get_parent(child);
+
+ if (off >= 0x1000) {
+ device_printf(dev, "%s(%s): invalid offset %#x\n",
+ __func__, device_get_nameunit(child), off);
+ return;
+ }
+
+ sc = device_get_softc(dev);
+
+ if (off == LBC85XX_LTESR && sc->sc_ltesr != ~0u) {
+ sc->sc_ltesr ^= (val & sc->sc_ltesr);
+ return;
+ }
+
+ if (off == LBC85XX_LTEATR && (val & 1) == 0)
+ sc->sc_ltesr = ~0u;
+ bus_space_write_4(sc->sc_bst, sc->sc_bsh, off, val);
+}
+
+uint32_t
+lbc_read_reg(device_t child, u_int off)
+{
+ device_t dev;
+ struct lbc_softc *sc;
+ uint32_t val;
+
+ dev = device_get_parent(child);
+
+ if (off >= 0x1000) {
+ device_printf(dev, "%s(%s): invalid offset %#x\n",
+ __func__, device_get_nameunit(child), off);
+ return (~0U);
+ }
+
+ sc = device_get_softc(dev);
+
+ if (off == LBC85XX_LTESR && sc->sc_ltesr != ~0U)
+ val = sc->sc_ltesr;
+ else
+ val = bus_space_read_4(sc->sc_bst, sc->sc_bsh, off);
+ return (val);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/lbc.h
--- a/head/sys/powerpc/mpc85xx/lbc.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/mpc85xx/lbc.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/mpc85xx/lbc.h 238045 2012-07-03 00:06:14Z marcel $
*/
#ifndef _MACHINE_LBC_H_
@@ -33,10 +33,35 @@
#define LBC_DEV_MAX 8
/* Local access registers */
-#define LBC85XX_BR(n) (8 * n)
-#define LBC85XX_OR(n) (4 + (8 * n))
-#define LBC85XX_LBCR (0xd0)
-#define LBC85XX_LCRR (0xd4)
+#define LBC85XX_BR(n) (0x0 + (8 * n)) /* Base register 0-7 */
+#define LBC85XX_OR(n) (0x4 + (8 * n)) /* Options register 0-7 */
+#define LBC85XX_MAR 0x068 /* UPM address register */
+#define LBC85XX_MAMR 0x070 /* UPMA mode register */
+#define LBC85XX_MBMR 0x074 /* UPMB mode register */
+#define LBC85XX_MCMR 0x078 /* UPMC mode register */
+#define LBC85XX_MRTPR 0x084 /* Memory refresh timer prescaler */
+#define LBC85XX_MDR 0x088 /* UPM data register */
+#define LBC85XX_LSOR 0x090 /* Special operation initiation */
+#define LBC85XX_LURT 0x0a0 /* UPM refresh timer */
+#define LBC85XX_LSRT 0x0a4 /* SDRAM refresh timer */
+#define LBC85XX_LTESR 0x0b0 /* Transfer error status register */
+#define LBC85XX_LTEDR 0x0b4 /* Transfer error disable register */
+#define LBC85XX_LTEIR 0x0b8 /* Transfer error interrupt register */
+#define LBC85XX_LTEATR 0x0bc /* Transfer error attributes register */
+#define LBC85XX_LTEAR 0x0c0 /* Transfer error address register */
+#define LBC85XX_LTECCR 0x0c4 /* Transfer error ECC register */
+#define LBC85XX_LBCR 0x0d0 /* Configuration register */
+#define LBC85XX_LCRR 0x0d4 /* Clock ratio register */
+#define LBC85XX_FMR 0x0e0 /* Flash mode register */
+#define LBC85XX_FIR 0x0e4 /* Flash instruction register */
+#define LBC85XX_FCR 0x0e8 /* Flash command register */
+#define LBC85XX_FBAR 0x0ec /* Flash block address register */
+#define LBC85XX_FPAR 0x0f0 /* Flash page address register */
+#define LBC85XX_FBCR 0x0f4 /* Flash byte count register */
+#define LBC85XX_FECC0 0x100 /* Flash ECC block 0 register */
+#define LBC85XX_FECC1 0x104 /* Flash ECC block 0 register */
+#define LBC85XX_FECC2 0x108 /* Flash ECC block 0 register */
+#define LBC85XX_FECC3 0x10c /* Flash ECC block 0 register */
/* LBC machine select */
#define LBCRES_MSEL_GPCM 0
@@ -55,10 +80,16 @@
#define LBCRES_ATOM_RAWA 1
#define LBCRES_ATOM_WARA 2
+struct lbc_memrange {
+ vm_paddr_t addr;
+ vm_size_t size;
+ vm_offset_t kva;
+};
+
struct lbc_bank {
- u_long pa; /* physical addr of the bank */
- u_long size; /* bank size */
- vm_offset_t va; /* VA of the bank */
+ vm_paddr_t addr; /* physical addr of the bank */
+ vm_size_t size; /* bank size */
+ vm_offset_t kva; /* VA of the bank */
/*
* XXX the following bank attributes do not have properties specified
@@ -74,17 +105,25 @@
struct lbc_softc {
device_t sc_dev;
- struct resource *sc_res;
+
+ struct resource *sc_mres;
bus_space_handle_t sc_bsh;
bus_space_tag_t sc_bst;
- int sc_rid;
+ int sc_mrid;
+
+ int sc_irid;
+ struct resource *sc_ires;
+ void *sc_icookie;
struct rman sc_rman;
int sc_addr_cells;
int sc_size_cells;
+ struct lbc_memrange sc_range[LBC_DEV_MAX];
struct lbc_bank sc_banks[LBC_DEV_MAX];
+
+ uint32_t sc_ltesr;
};
struct lbc_devinfo {
@@ -93,4 +132,7 @@
int di_bank;
};
+uint32_t lbc_read_reg(device_t child, u_int off);
+void lbc_write_reg(device_t child, u_int off, uint32_t val);
+
#endif /* _MACHINE_LBC_H_ */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/mpc85xx.c
--- a/head/sys/powerpc/mpc85xx/mpc85xx.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/mpc85xx/mpc85xx.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/mpc85xx.c 222428 2011-05-28 19:14:16Z marcel $");
+__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/mpc85xx.c 235934 2012-05-24 21:07:10Z marcel $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -38,6 +38,7 @@
#include <machine/cpu.h>
#include <machine/cpufunc.h>
+#include <machine/pio.h>
#include <machine/spr.h>
#include <powerpc/mpc85xx/mpc85xx.h>
@@ -60,7 +61,7 @@
volatile uint32_t *ptr = (void *)addr;
*ptr = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
int
@@ -87,6 +88,9 @@
uint32_t bar, sr;
int i, law_max;
+ if (size == 0)
+ return (0);
+
law_max = law_getmax();
bar = _LAW_BAR(addr);
sr = _LAW_SR(trgt, size);
@@ -167,7 +171,10 @@
default:
rv = ENXIO;
}
- *trgt_mem = *trgt_io = trgt;
+ if (rv == 0) {
+ *trgt_mem = trgt;
+ *trgt_io = trgt;
+ }
return (rv);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/nexus.c
--- a/head/sys/powerpc/mpc85xx/nexus.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/mpc85xx/nexus.c Wed Jul 25 16:40:53 2012 +0300
@@ -54,7 +54,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/nexus.c 227843 2011-11-22 21:28:20Z marius $");
+__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/nexus.c 238042 2012-07-02 23:41:56Z marcel $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -65,6 +65,8 @@
#include <sys/malloc.h>
#include <sys/rman.h>
+#include <machine/intr_machdep.h>
+
/*
* Device interface
*/
@@ -75,6 +77,13 @@
static int nexus_deactivate_resource(device_t, device_t, int, int,
struct resource *);
+static int nexus_config_intr(device_t, int, enum intr_trigger,
+ enum intr_polarity);
+static int nexus_setup_intr(device_t, device_t, struct resource *, int,
+ driver_filter_t *, driver_intr_t *, void *, void **);
+static int nexus_teardown_intr(device_t, device_t, struct resource *,
+ void *);
+
static device_method_t nexus_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, nexus_probe),
@@ -89,8 +98,9 @@
DEVMETHOD(bus_probe_nomatch, NULL),
DEVMETHOD(bus_read_ivar, NULL),
DEVMETHOD(bus_write_ivar, NULL),
- DEVMETHOD(bus_setup_intr, NULL),
- DEVMETHOD(bus_teardown_intr, NULL),
+ DEVMETHOD(bus_config_intr, nexus_config_intr),
+ DEVMETHOD(bus_setup_intr, nexus_setup_intr),
+ DEVMETHOD(bus_teardown_intr, nexus_teardown_intr),
DEVMETHOD(bus_alloc_resource, NULL),
DEVMETHOD(bus_activate_resource, nexus_activate_resource),
DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource),
@@ -143,3 +153,49 @@
/* Not much to be done yet... */
return (rman_deactivate_resource(res));
}
+
+static int
+nexus_config_intr(device_t bus, int irq, enum intr_trigger trig,
+ enum intr_polarity pol)
+{
+
+ return (powerpc_config_intr(irq, trig, pol));
+}
+
+static int
+nexus_setup_intr(device_t bus, device_t child, struct resource *res, int flags,
+ driver_filter_t *ifilt, driver_intr_t *ihand, void *arg, void **cookiep)
+{
+ int error;
+
+ *cookiep = NULL;
+
+ /* somebody tried to setup an irq that failed to allocate! */
+ if (res == NULL)
+ return (EINVAL);
+
+ if ((rman_get_flags(res) & RF_SHAREABLE) == 0)
+ flags |= INTR_EXCL;
+
+ /* We depend on rman_activate_resource() being idempotent. */
+ error = rman_activate_resource(res);
+ if (error)
+ return (error);
+
+ error = powerpc_setup_intr(device_get_nameunit(child),
+ rman_get_start(res), ifilt, ihand, arg, flags, cookiep);
+ return (error);
+}
+
+static int
+nexus_teardown_intr(device_t bus, device_t child, struct resource *res,
+ void *cookie)
+{
+ int error;
+
+ if (res == NULL)
+ return (EINVAL);
+
+ error = powerpc_teardown_intr(cookie);
+ return (error);
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/openpic_fdt.c
--- a/head/sys/powerpc/mpc85xx/openpic_fdt.c Wed Jul 25 16:32:50 2012 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,93 +0,0 @@
-/*-
- * Copyright (c) 2009-2010 The FreeBSD Foundation
- * All rights reserved.
- *
- * This software was developed by Semihalf under sponsorship from
- * the FreeBSD Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/openpic_fdt.c 222813 2011-06-07 08:46:13Z attilio $");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-#include <sys/bus.h>
-
-#include <machine/bus.h>
-#include <machine/intr_machdep.h>
-
-#include <dev/ofw/ofw_bus.h>
-#include <dev/ofw/ofw_bus_subr.h>
-
-#include <machine/openpicvar.h>
-
-#include "pic_if.h"
-
-static int openpic_fdt_probe(device_t);
-static int openpic_fdt_attach(device_t);
-
-static device_method_t openpic_fdt_methods[] = {
- /* Device interface */
- DEVMETHOD(device_probe, openpic_fdt_probe),
- DEVMETHOD(device_attach, openpic_fdt_attach),
-
- /* PIC interface */
- DEVMETHOD(pic_bind, openpic_bind),
- DEVMETHOD(pic_config, openpic_config),
- DEVMETHOD(pic_dispatch, openpic_dispatch),
- DEVMETHOD(pic_enable, openpic_enable),
- DEVMETHOD(pic_eoi, openpic_eoi),
- DEVMETHOD(pic_ipi, openpic_ipi),
- DEVMETHOD(pic_mask, openpic_mask),
- DEVMETHOD(pic_unmask, openpic_unmask),
-
- { 0, 0 },
-};
-
-static driver_t openpic_fdt_driver = {
- "openpic",
- openpic_fdt_methods,
- sizeof(struct openpic_softc)
-};
-
-DRIVER_MODULE(openpic, simplebus, openpic_fdt_driver, openpic_devclass, 0, 0);
-
-static int
-openpic_fdt_probe(device_t dev)
-{
-
- if (!ofw_bus_is_compatible(dev, "chrp,open-pic"))
- return (ENXIO);
-
- device_set_desc(dev, OPENPIC_DEVSTR);
- return (BUS_PROBE_DEFAULT);
-}
-
-static int
-openpic_fdt_attach(device_t dev)
-{
-
- return (openpic_common_attach(dev, ofw_bus_get_node(dev)));
-}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/mpc85xx/pci_fdt.c
--- a/head/sys/powerpc/mpc85xx/pci_fdt.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/mpc85xx/pci_fdt.c Wed Jul 25 16:40:53 2012 +0300
@@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/pci_fdt.c 227843 2011-11-22 21:28:20Z marius $");
+__FBSDID("$FreeBSD: head/sys/powerpc/mpc85xx/pci_fdt.c 235933 2012-05-24 21:01:35Z marcel $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -816,8 +816,13 @@
}
*allocp = pci_start + alloc;
- *vap = (uintptr_t)pmap_mapdev(start, size);
- fsl_pcib_outbound(sc, wnd, type, start, size, pci_start);
+ if (size > 0) {
+ *vap = (uintptr_t)pmap_mapdev(start, size);
+ fsl_pcib_outbound(sc, wnd, type, start, size, pci_start);
+ } else {
+ *vap = 0;
+ fsl_pcib_outbound(sc, wnd, -1, 0, 0, 0);
+ }
return (0);
}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powermac/hrowpic.c
--- a/head/sys/powerpc/powermac/hrowpic.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powermac/hrowpic.c Wed Jul 25 16:40:53 2012 +0300
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/powerpc/powermac/hrowpic.c 237936 2012-07-01 19:07:45Z rpaulo $
*/
/*
@@ -245,7 +245,7 @@
}
static void
-hrowpic_eoi(device_t dev __unused, u_int irq __unused)
+hrowpic_eoi(device_t dev, u_int irq)
{
struct hrowpic_softc *sc;
int bank;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/atomic.S
--- a/head/sys/powerpc/powerpc/atomic.S Wed Jul 25 16:32:50 2012 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-/*-
- * Copyright (c) 2000, 2001 Benno Rice
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: head/sys/powerpc/powerpc/atomic.S 230400 2012-01-20 22:34:19Z andreast $
- */
-
-#include <machine/asm.h>
-
- .text
-
-ASENTRY_NOPROF(atomic_set_8)
-0: lwarx 0, 0, 3 /* load old value */
- slwi 4, 4, 24 /* shift the byte so it's in the right place */
- or 0, 0, 4 /* generate new value */
- stwcx. 0, 0, 3 /* attempt to store */
- bne- 0 /* loop if failed */
- eieio /* synchronise */
- sync
- blr /* return */
-
-ASENTRY_NOPROF(atomic_clear_8)
-0: lwarx 0, 0, 3 /* load old value */
- slwi 4, 4, 24 /* shift the byte so it's in the right place */
- andc 0, 0, 4 /* generate new value */
- stwcx. 0, 0, 3 /* attempt to store */
- bne- 0 /* loop if failed */
- eieio /* synchronise */
- sync
- blr /* return */
-
-ASENTRY_NOPROF(atomic_add_8)
-0: lwarx 9, 0, 3 /* load old value */
- srwi 0, 9, 24 /* byte alignment */
- add 0, 4, 0 /* calculate new value */
- slwi 0, 9, 24 /* byte alignment */
- clrlwi 9, 9, 8 /* clear the byte in the original word */
- or 9, 9, 0 /* copy back in to the original word */
- stwcx. 9, 0, 3 /* attempt to store */
- bne- 0 /* loop if failed */
- eieio /* synchronise */
- sync
- blr /* return */
-
-ASENTRY_NOPROF(atomic_subtract_8)
-0: lwarx 9, 0, 3 /* load old value */
- srwi 0, 9, 24 /* byte alignment */
- subf 0, 4, 0 /* calculate new value */
- slwi 0, 9, 24 /* byte alignment */
- clrlwi 9, 9, 8 /* clear the byte in the original word */
- or 9, 9, 0 /* copy back in to the original word */
- stwcx. 9, 0, 3 /* attempt to store */
- bne- 0 /* loop if failed */
- eieio /* synchronise */
- sync
- blr /* return */
-
-ASENTRY_NOPROF(atomic_set_16)
- li 11, 3 /* mask to test for alignment */
- andc. 11, 3, 11 /* force address to be word-aligned */
-0: lwarx 12, 0, 11 /* load old value */
- bne 1f /* no realignment needed if it's aligned */
- slwi 4, 4, 16 /* realign operand */
-1: or 12, 12, 4 /* calculate new value */
- stwcx. 12, 0, 11 /* attempt to store */
- bne- 0b /* loop if failed */
- eieio /* synchronise */
- sync
- blr /* return */
-
-ASENTRY_NOPROF(atomic_clear_16)
- li 11, 3 /* mask to test for alignment */
- andc. 11, 3, 11 /* force address to be word-aligned */
-0: lwarx 12, 0, 11 /* load old value */
- bne 1f /* no realignment needed if it's aligned */
- slwi 4, 4, 16 /* realign operand */
-1: andc 12, 12, 4 /* calculate new value */
- stwcx. 12, 0, 11 /* attempt to store */
- bne- 0b /* loop if failed */
- eieio /* synchronise */
- sync
- blr /* return */
-
-ASENTRY_NOPROF(atomic_add_16)
- li 11, 3 /* mask to test for alignment */
- andc. 11, 3, 11 /* force address to be word-aligned */
-0: lwarx 12, 0, 11 /* load old value */
- bne 1f /* no realignment needed if it's aligned */
- srwi 12, 9, 16 /* realign */
-1: add 12, 4, 12 /* calculate new value */
- bne 2f /* no realignment needed if it's aligned */
- slwi 12, 12, 16 /* realign */
-2: clrlwi 9, 9, 16 /* clear old value */
- or 9, 9, 12 /* copy in new value */
- stwcx. 12, 0, 11 /* attempt to store */
- bne- 0b /* loop if failed */
- eieio /* synchronise */
- sync
- blr /* return */
-
-ASENTRY_NOPROF(atomic_subtract_16)
- li 11, 3 /* mask to test for alignment */
- andc. 11, 3, 11 /* force address to be word-aligned */
-0: lwarx 12, 0, 11 /* load old value */
- bne 1f /* no realignment needed if it's aligned */
- srwi 12, 9, 16 /* realign */
-1: subf 12, 4, 12 /* calculate new value */
- bne 2f /* no realignment needed if it's aligned */
- slwi 12, 12, 16 /* realign */
-2: clrlwi 9, 9, 16 /* clear old value */
- or 9, 9, 12 /* copy in new value */
- stwcx. 12, 0, 11 /* attempt to store */
- bne- 0 /* loop if failed */
- eieio /* synchronise */
- sync
- blr /* return */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/bus_machdep.c
--- a/head/sys/powerpc/powerpc/bus_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powerpc/bus_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/powerpc/bus_machdep.c 226410 2011-10-15 23:15:55Z nwhitehorn $");
+__FBSDID("$FreeBSD: head/sys/powerpc/powerpc/bus_machdep.c 234579 2012-04-22 18:54:51Z nwhitehorn $");
#define KTR_BE_IO 0
#define KTR_LE_IO 0
@@ -169,7 +169,8 @@
bs_gen_barrier(bus_space_handle_t bsh __unused, bus_size_t ofs __unused,
bus_size_t size __unused, int flags __unused)
{
- __asm __volatile("eieio; sync" : : : "memory");
+
+ powerpc_iomb();
}
/*
@@ -183,6 +184,7 @@
addr = __ppc_ba(bsh, ofs);
res = *addr;
+ powerpc_iomb();
CTR4(KTR_BE_IO, "%s(bsh=%#x, ofs=%#x) = %#x", __func__, bsh, ofs, res);
return (res);
}
@@ -195,6 +197,7 @@
addr = __ppc_ba(bsh, ofs);
res = *addr;
+ powerpc_iomb();
CTR4(KTR_BE_IO, "%s(bsh=%#x, ofs=%#x) = %#x", __func__, bsh, ofs, res);
return (res);
}
@@ -207,6 +210,7 @@
addr = __ppc_ba(bsh, ofs);
res = *addr;
+ powerpc_iomb();
CTR4(KTR_BE_IO, "%s(bsh=%#x, ofs=%#x) = %#x", __func__, bsh, ofs, res);
return (res);
}
@@ -219,6 +223,7 @@
addr = __ppc_ba(bsh, ofs);
res = *addr;
+ powerpc_iomb();
return (res);
}
@@ -253,7 +258,7 @@
while (cnt--)
*addr++ = *s++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -263,7 +268,7 @@
while (cnt--)
*addr++ = *s++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -273,7 +278,7 @@
while (cnt--)
*addr++ = *s++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -283,7 +288,7 @@
while (cnt--)
*addr++ = *s++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -293,7 +298,7 @@
addr = __ppc_ba(bsh, ofs);
*addr = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
CTR4(KTR_BE_IO, "%s(bsh=%#x, ofs=%#x, val=%#x)", __func__, bsh, ofs, val);
}
@@ -304,7 +309,7 @@
addr = __ppc_ba(bsh, ofs);
*addr = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
CTR4(KTR_BE_IO, "%s(bsh=%#x, ofs=%#x, val=%#x)", __func__, bsh, ofs, val);
}
@@ -315,7 +320,7 @@
addr = __ppc_ba(bsh, ofs);
*addr = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
CTR4(KTR_BE_IO, "%s(bsh=%#x, ofs=%#x, val=%#x)", __func__, bsh, ofs, val);
}
@@ -326,7 +331,7 @@
addr = __ppc_ba(bsh, ofs);
*addr = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -365,7 +370,7 @@
while (cnt--)
*d++ = *addr++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -376,7 +381,7 @@
while (cnt--)
*d++ = *addr++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -387,7 +392,7 @@
while (cnt--)
*d++ = *addr++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -398,7 +403,7 @@
while (cnt--)
*d++ = *addr++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -408,7 +413,7 @@
while (cnt--)
*d = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -418,7 +423,7 @@
while (cnt--)
*d = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -428,7 +433,7 @@
while (cnt--)
*d = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -438,7 +443,7 @@
while (cnt--)
*d = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -448,7 +453,7 @@
while (cnt--)
*d++ = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -458,7 +463,7 @@
while (cnt--)
*d++ = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -468,7 +473,7 @@
while (cnt--)
*d++ = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -478,7 +483,7 @@
while (cnt--)
*d++ = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
/*
@@ -492,7 +497,7 @@
addr = __ppc_ba(bsh, ofs);
res = *addr;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
CTR4(KTR_LE_IO, "%s(bsh=%#x, ofs=%#x) = %#x", __func__, bsh, ofs, res);
return (res);
}
@@ -505,7 +510,7 @@
addr = __ppc_ba(bsh, ofs);
__asm __volatile("lhbrx %0, 0, %1" : "=r"(res) : "r"(addr));
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
CTR4(KTR_LE_IO, "%s(bsh=%#x, ofs=%#x) = %#x", __func__, bsh, ofs, res);
return (res);
}
@@ -518,7 +523,7 @@
addr = __ppc_ba(bsh, ofs);
__asm __volatile("lwbrx %0, 0, %1" : "=r"(res) : "r"(addr));
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
CTR4(KTR_LE_IO, "%s(bsh=%#x, ofs=%#x) = %#x", __func__, bsh, ofs, res);
return (res);
}
@@ -560,7 +565,7 @@
while (cnt--)
*addr++ = *s++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -570,7 +575,7 @@
while (cnt--)
*addr++ = in16rb(s++);
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -580,7 +585,7 @@
while (cnt--)
*addr++ = in32rb(s++);
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -596,6 +601,7 @@
addr = __ppc_ba(bsh, ofs);
*addr = val;
+ powerpc_iomb();
CTR4(KTR_LE_IO, "%s(bsh=%#x, ofs=%#x, val=%#x)", __func__, bsh, ofs, val);
}
@@ -606,6 +612,7 @@
addr = __ppc_ba(bsh, ofs);
__asm __volatile("sthbrx %0, 0, %1" :: "r"(val), "r"(addr));
+ powerpc_iomb();
CTR4(KTR_LE_IO, "%s(bsh=%#x, ofs=%#x, val=%#x)", __func__, bsh, ofs, val);
}
@@ -616,6 +623,7 @@
addr = __ppc_ba(bsh, ofs);
__asm __volatile("stwbrx %0, 0, %1" :: "r"(val), "r"(addr));
+ powerpc_iomb();
CTR4(KTR_LE_IO, "%s(bsh=%#x, ofs=%#x, val=%#x)", __func__, bsh, ofs, val);
}
@@ -661,7 +669,7 @@
while (cnt--)
*d++ = *addr++;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -672,7 +680,7 @@
while (cnt--)
out16rb(d++, *addr++);
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -683,7 +691,7 @@
while (cnt--)
out32rb(d++, *addr++);
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -700,7 +708,7 @@
while (cnt--)
*d = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -710,7 +718,7 @@
while (cnt--)
out16rb(d, val);
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -720,7 +728,7 @@
while (cnt--)
out32rb(d, val);
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -736,7 +744,7 @@
while (cnt--)
*d++ = val;
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -746,7 +754,7 @@
while (cnt--)
out16rb(d++, val);
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
@@ -756,7 +764,7 @@
while (cnt--)
out32rb(d++, val);
- __asm __volatile("eieio; sync");
+ powerpc_iomb();
}
static void
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/cpu.c
--- a/head/sys/powerpc/powerpc/cpu.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powerpc/cpu.c Wed Jul 25 16:40:53 2012 +0300
@@ -55,7 +55,7 @@
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* from $NetBSD: cpu_subr.c,v 1.1 2003/02/03 17:10:09 matt Exp $
- * $FreeBSD: head/sys/powerpc/powerpc/cpu.c 225953 2011-10-03 21:19:15Z mav $
+ * $FreeBSD: head/sys/powerpc/powerpc/cpu.c 236141 2012-05-27 10:25:20Z raj $
*/
#include <sys/param.h>
@@ -75,13 +75,13 @@
#include <machine/spr.h>
static void cpu_6xx_setup(int cpuid, uint16_t vers);
-static void cpu_e500_setup(int cpuid, uint16_t vers);
static void cpu_970_setup(int cpuid, uint16_t vers);
+static void cpu_booke_setup(int cpuid, uint16_t vers);
int powerpc_pow_enabled;
void (*cpu_idle_hook)(void) = NULL;
static void cpu_idle_60x(void);
-static void cpu_idle_e500(void);
+static void cpu_idle_booke(void);
struct cputab {
const char *name;
@@ -146,9 +146,13 @@
{ "Motorola PowerPC 8245", MPC8245, REVFMT_MAJMIN,
PPC_FEATURE_HAS_FPU, cpu_6xx_setup },
{ "Freescale e500v1 core", FSL_E500v1, REVFMT_MAJMIN,
- 0, cpu_e500_setup },
+ 0, cpu_booke_setup },
{ "Freescale e500v2 core", FSL_E500v2, REVFMT_MAJMIN,
- 0, cpu_e500_setup },
+ 0, cpu_booke_setup },
+ { "Freescale e500mc core", FSL_E500mc, REVFMT_MAJMIN,
+ 0, cpu_booke_setup },
+ { "Freescale e5500 core", FSL_E5500, REVFMT_MAJMIN,
+ 0, cpu_booke_setup },
{ "IBM Cell Broadband Engine", IBMCELLBE, REVFMT_MAJMIN,
PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU,
NULL},
@@ -191,6 +195,8 @@
break;
case FSL_E500v1:
case FSL_E500v2:
+ case FSL_E500mc:
+ case FSL_E5500:
maj = (pvr >> 4) & 0xf;
min = (pvr >> 0) & 0xf;
break;
@@ -438,8 +444,9 @@
}
static void
-cpu_e500_setup(int cpuid, uint16_t vers)
+cpu_booke_setup(int cpuid, uint16_t vers)
{
+#ifdef BOOKE_E500
register_t hid0;
hid0 = mfspr(SPR_HID0);
@@ -451,9 +458,10 @@
mtspr(SPR_HID0, hid0);
printf("cpu%d: HID0 %b\n", cpuid, (int)hid0, HID0_E500_BITMASK);
+#endif
if (cpu_idle_hook == NULL)
- cpu_idle_hook = cpu_idle_e500;
+ cpu_idle_hook = cpu_idle_booke;
}
static void
@@ -519,6 +527,7 @@
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
+
if (cpu_idle_hook != NULL) {
if (!busy) {
critical_enter();
@@ -530,6 +539,7 @@
critical_exit();
}
}
+
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
busy, curcpu);
}
@@ -576,7 +586,7 @@
}
static void
-cpu_idle_e500(void)
+cpu_idle_booke(void)
{
register_t msr;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/db_trace.c
--- a/head/sys/powerpc/powerpc/db_trace.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powerpc/db_trace.c Wed Jul 25 16:40:53 2012 +0300
@@ -1,4 +1,4 @@
-/* $FreeBSD: head/sys/powerpc/powerpc/db_trace.c 223470 2011-06-23 09:43:53Z andreast $ */
+/* $FreeBSD: head/sys/powerpc/powerpc/db_trace.c 236141 2012-05-27 10:25:20Z raj $ */
/* $NetBSD: db_trace.c,v 1.20 2002/05/13 20:30:09 matt Exp $ */
/* $OpenBSD: db_trace.c,v 1.3 1997/03/21 02:10:48 niklas Exp $ */
@@ -102,7 +102,7 @@
{ "dar", DB_OFFSET(cpu.aim.dar), db_frame },
{ "dsisr", DB_OFFSET(cpu.aim.dsisr), db_frame },
#endif
-#ifdef E500
+#if defined(BOOKE)
{ "dear", DB_OFFSET(cpu.booke.dear), db_frame },
{ "esr", DB_OFFSET(cpu.booke.esr), db_frame },
#endif
@@ -243,7 +243,7 @@
case EXC_SC: trapstr = "SC"; break;
case EXC_EXI: trapstr = "EXI"; break;
case EXC_MCHK: trapstr = "MCHK"; break;
-#ifndef E500
+#if !defined(BOOKE)
case EXC_VEC: trapstr = "VEC"; break;
case EXC_FPA: trapstr = "FPA"; break;
case EXC_BPT: trapstr = "BPT"; break;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/gdb_machdep.c
--- a/head/sys/powerpc/powerpc/gdb_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powerpc/gdb_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/powerpc/powerpc/gdb_machdep.c 236141 2012-05-27 10:25:20Z raj $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -84,7 +84,7 @@
int
gdb_cpu_signal(int vector, int dummy __unused)
{
-#ifdef E500
+#if defined(BOOKE)
if (vector == EXC_DEBUG || vector == EXC_PGM)
return (SIGTRAP);
#else
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/genassym.c
--- a/head/sys/powerpc/powerpc/genassym.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powerpc/genassym.c Wed Jul 25 16:40:53 2012 +0300
@@ -30,7 +30,7 @@
* SUCH DAMAGE.
*
* from: @(#)genassym.c 5.11 (Berkeley) 5/10/91
- * $FreeBSD: head/sys/powerpc/powerpc/genassym.c 230123 2012-01-15 00:08:14Z nwhitehorn $
+ * $FreeBSD: head/sys/powerpc/powerpc/genassym.c 236141 2012-05-27 10:25:20Z raj $
*/
#include <sys/param.h>
@@ -63,7 +63,7 @@
ASSYM(PC_DISISAVE, offsetof(struct pcpu, pc_disisave));
ASSYM(PC_DBSAVE, offsetof(struct pcpu, pc_dbsave));
-#ifdef E500
+#if defined(BOOKE)
ASSYM(PC_BOOKE_CRITSAVE, offsetof(struct pcpu, pc_booke_critsave));
ASSYM(PC_BOOKE_MCHKSAVE, offsetof(struct pcpu, pc_booke_mchksave));
ASSYM(PC_BOOKE_TLBSAVE, offsetof(struct pcpu, pc_booke_tlbsave));
@@ -116,15 +116,14 @@
ASSYM(PM_SR, offsetof(struct pmap, pm_sr));
ASSYM(USER_SR, USER_SR);
#endif
-#elif defined(E500)
+#elif defined(BOOKE)
ASSYM(PM_PDIR, offsetof(struct pmap, pm_pdir));
-#endif
-
-#if defined(E500)
ASSYM(PTE_RPN, offsetof(struct pte, rpn));
ASSYM(PTE_FLAGS, offsetof(struct pte, flags));
+#if defined(BOOKE_E500)
ASSYM(TLB0_ENTRY_SIZE, sizeof(struct tlb_entry));
#endif
+#endif
#ifdef __powerpc64__
ASSYM(FSP, 48);
@@ -215,18 +214,20 @@
ASSYM(KERNBASE, KERNBASE);
ASSYM(MAXCOMLEN, MAXCOMLEN);
-#ifdef E500
+#if defined(BOOKE)
+ASSYM(PSL_DE, PSL_DE);
+ASSYM(PSL_DS, PSL_DS);
+ASSYM(PSL_IS, PSL_IS);
+ASSYM(PSL_CE, PSL_CE);
+#endif
+#if defined(BOOKE_E500)
ASSYM(PSL_UCLE, PSL_UCLE);
ASSYM(PSL_SPE, PSL_SPE);
ASSYM(PSL_WE, PSL_WE);
-ASSYM(PSL_CE, PSL_CE);
ASSYM(PSL_UBLE, PSL_UBLE);
-ASSYM(PSL_DS, PSL_DS);
-ASSYM(PSL_IS, PSL_IS);
-ASSYM(PSL_DE, PSL_DE);
ASSYM(PSL_KERNSET_INIT, PSL_KERNSET_INIT);
-#else /* AIM */
+#elif defined(AIM)
#ifdef __powerpc64__
ASSYM(PSL_SF, PSL_SF);
ASSYM(PSL_HV, PSL_HV);
@@ -256,8 +257,9 @@
ASSYM(PSL_FP, PSL_FP);
ASSYM(PSL_ME, PSL_ME);
ASSYM(PSL_PR, PSL_PR);
+#if defined(BOOKE_E500)
ASSYM(PSL_PMM, PSL_PMM);
-
+#endif
ASSYM(PSL_KERNSET, PSL_KERNSET);
ASSYM(PSL_USERSET, PSL_USERSET);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/mmu_if.m
--- a/head/sys/powerpc/powerpc/mmu_if.m Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powerpc/mmu_if.m Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
-# $FreeBSD: head/sys/powerpc/powerpc/mmu_if.m 225418 2011-09-06 10:30:11Z kib $
+# $FreeBSD: head/sys/powerpc/powerpc/mmu_if.m 238357 2012-07-10 22:10:21Z alc $
#
#include <sys/param.h>
@@ -387,7 +387,7 @@
*
* @retval int count of referenced bits
*/
-METHOD boolean_t ts_referenced {
+METHOD int ts_referenced {
mmu_t _mmu;
vm_page_t _pg;
};
@@ -761,7 +761,7 @@
*/
METHOD void * mapdev {
mmu_t _mmu;
- vm_offset_t _pa;
+ vm_paddr_t _pa;
vm_size_t _size;
};
@@ -818,7 +818,7 @@
*
* @retval pa physical address corresponding to mapping
*/
-METHOD vm_offset_t kextract {
+METHOD vm_paddr_t kextract {
mmu_t _mmu;
vm_offset_t _va;
};
@@ -833,7 +833,7 @@
METHOD void kenter {
mmu_t _mmu;
vm_offset_t _va;
- vm_offset_t _pa;
+ vm_paddr_t _pa;
};
/**
@@ -860,7 +860,7 @@
*/
METHOD boolean_t dev_direct_mapped {
mmu_t _mmu;
- vm_offset_t _pa;
+ vm_paddr_t _pa;
vm_size_t _size;
};
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/openpic_fdt.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/powerpc/powerpc/openpic_fdt.c Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 2009-2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/powerpc/powerpc/openpic_fdt.c 236119 2012-05-26 21:02:49Z raj $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+
+#include <machine/bus.h>
+#include <machine/intr_machdep.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <machine/openpicvar.h>
+
+#include "pic_if.h"
+
+static int openpic_fdt_probe(device_t);
+static int openpic_fdt_attach(device_t);
+
+static device_method_t openpic_fdt_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, openpic_fdt_probe),
+ DEVMETHOD(device_attach, openpic_fdt_attach),
+
+ /* PIC interface */
+ DEVMETHOD(pic_bind, openpic_bind),
+ DEVMETHOD(pic_config, openpic_config),
+ DEVMETHOD(pic_dispatch, openpic_dispatch),
+ DEVMETHOD(pic_enable, openpic_enable),
+ DEVMETHOD(pic_eoi, openpic_eoi),
+ DEVMETHOD(pic_ipi, openpic_ipi),
+ DEVMETHOD(pic_mask, openpic_mask),
+ DEVMETHOD(pic_unmask, openpic_unmask),
+
+ { 0, 0 },
+};
+
+static driver_t openpic_fdt_driver = {
+ "openpic",
+ openpic_fdt_methods,
+ sizeof(struct openpic_softc)
+};
+
+DRIVER_MODULE(openpic, simplebus, openpic_fdt_driver, openpic_devclass, 0, 0);
+
+static int
+openpic_fdt_probe(device_t dev)
+{
+
+ if (!ofw_bus_is_compatible(dev, "chrp,open-pic"))
+ return (ENXIO);
+
+ device_set_desc(dev, OPENPIC_DEVSTR);
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+openpic_fdt_attach(device_t dev)
+{
+
+ return (openpic_common_attach(dev, ofw_bus_get_node(dev)));
+}
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/platform.c
--- a/head/sys/powerpc/powerpc/platform.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powerpc/platform.c Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/powerpc/platform.c 227537 2011-11-15 20:11:03Z marius $");
+__FBSDID("$FreeBSD: head/sys/powerpc/powerpc/platform.c 235936 2012-05-24 21:13:24Z raj $");
/*
* Dispatch platform calls to the appropriate platform implementation
@@ -92,7 +92,7 @@
&aregions, &naregions);
for (i = 0; i < npregions; i++)
- if ((addr >= pregions[i].mr_start)
+ if ((addr >= pregions[i].mr_start)
&& (addr + len <= pregions[i].mr_start + pregions[i].mr_size))
return (0);
@@ -116,7 +116,7 @@
{
return (PLATFORM_TIMEBASE_FREQ(plat_obj, cpu));
}
-
+
int
platform_smp_first_cpu(struct cpuref *cpu)
{
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/powerpc/powerpc/pmap_dispatch.c
--- a/head/sys/powerpc/powerpc/pmap_dispatch.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/powerpc/powerpc/pmap_dispatch.c Wed Jul 25 16:40:53 2012 +0300
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/powerpc/powerpc/pmap_dispatch.c 227537 2011-11-15 20:11:03Z marius $");
+__FBSDID("$FreeBSD: head/sys/powerpc/powerpc/pmap_dispatch.c 235936 2012-05-24 21:13:24Z raj $");
/*
* Dispatch MI pmap calls to the appropriate MMU implementation
@@ -76,6 +76,20 @@
int pmap_bootstrapped;
+#ifdef AIM
+int
+pvo_vaddr_compare(struct pvo_entry *a, struct pvo_entry *b)
+{
+ if (PVO_VADDR(a) < PVO_VADDR(b))
+ return (-1);
+ else if (PVO_VADDR(a) > PVO_VADDR(b))
+ return (1);
+ return (0);
+}
+RB_GENERATE(pvo_tree, pvo_entry, pvo_plink, pvo_vaddr_compare);
+#endif
+
+
void
pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
{
@@ -426,7 +440,7 @@
}
void *
-pmap_mapdev(vm_offset_t pa, vm_size_t size)
+pmap_mapdev(vm_paddr_t pa, vm_size_t size)
{
CTR3(KTR_PMAP, "%s(%#x, %#x)", __func__, pa, size);
@@ -457,7 +471,7 @@
MMU_UNMAPDEV(mmu_obj, va, size);
}
-vm_offset_t
+vm_paddr_t
pmap_kextract(vm_offset_t va)
{
@@ -466,7 +480,7 @@
}
void
-pmap_kenter(vm_offset_t va, vm_offset_t pa)
+pmap_kenter(vm_offset_t va, vm_paddr_t pa)
{
CTR3(KTR_PMAP, "%s(%#x, %#x)", __func__, va, pa);
@@ -482,7 +496,7 @@
}
boolean_t
-pmap_dev_direct_mapped(vm_offset_t pa, vm_size_t size)
+pmap_dev_direct_mapped(vm_paddr_t pa, vm_size_t size)
{
CTR3(KTR_PMAP, "%s(%#x, %#x)", __func__, pa, size);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/conf/GENERIC
--- a/head/sys/sparc64/conf/GENERIC Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/conf/GENERIC Wed Jul 25 16:40:53 2012 +0300
@@ -16,7 +16,7 @@
# If you are in doubt as to the purpose or necessity of a line, check first
# in NOTES.
#
-# $FreeBSD: head/sys/sparc64/conf/GENERIC 234348 2012-04-16 18:29:07Z marius $
+# $FreeBSD: head/sys/sparc64/conf/GENERIC 237842 2012-06-30 14:55:36Z marius $
cpu SUN4U
ident GENERIC
@@ -26,7 +26,7 @@
# Platforms supported
# At this time all platforms are supported, as-is.
-options SCHED_ULE # ULE scheduler
+options SCHED_4BSD # 4BSD scheduler
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
options INET6 # IPv6 communications protocols
@@ -109,6 +109,7 @@
device isp # Qlogic family
device ispfw # Firmware module for Qlogic host adapters
device mpt # LSI-Logic MPT-Fusion
+device mps # LSI-Logic MPT-Fusion 2
device sym # NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D
# ATA/SCSI peripherals
@@ -118,7 +119,7 @@
device sa # Sequential Access (tape etc)
device cd # CD
device pass # Passthrough device (direct ATA/SCSI access)
-device ses # SCSI Environmental Services (and SAF-TE)
+device ses # Enclosure Services (SES and SAF-TE)
device ctl # CAM Target Layer
# RAID controllers
@@ -234,35 +235,8 @@
device ohci # OHCI PCI->USB interface
device ehci # EHCI PCI->USB interface (USB 2.0)
device usb # USB Bus (required)
-#device udbp # USB Double Bulk Pipe devices (needs netgraph)
-device uhid # "Human Interface Devices"
device ukbd # Keyboard
-device ulpt # Printer
device umass # Disks/Mass storage - Requires scbus and da
-device ums # Mouse
-device urio # Diamond Rio 500 MP3 player
-# USB Serial devices
-device uark # Technologies ARK3116 based serial adapters
-device ubsa # Belkin F5U103 and compatible serial adapters
-device uftdi # For FTDI usb serial adapters
-device uipaq # Some WinCE based devices
-device uplcom # Prolific PL-2303 serial adapters
-device uslcom # SI Labs CP2101/CP2102 serial adapters
-device uvisor # Visor and Palm devices
-device uvscom # USB serial support for DDI pocket's PHS
-# USB Ethernet, requires miibus
-device aue # ADMtek USB Ethernet
-device axe # ASIX Electronics USB Ethernet
-device cdce # Generic USB over Ethernet
-device cue # CATC USB Ethernet
-device kue # Kawasaki LSI USB Ethernet
-device rue # RealTek RTL8150 USB Ethernet
-device udav # Davicom DM9601E USB
-# USB Wireless
-device rum # Ralink Technology RT2501USB wireless NICs
-device uath # Atheros AR5523 wireless NICs
-device ural # Ralink Technology RT2500USB wireless NICs
-device zyd # ZyDAS zd1211/zd1211b wireless NICs
# FireWire support
device firewire # FireWire bus code
@@ -278,4 +252,3 @@
device snd_audiocs # Crystal Semiconductor CS4231
device snd_es137x # Ensoniq AudioPCI ES137x
device snd_t4dwave # Acer Labs M5451
-device snd_uaudio # USB Audio
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/include/_stdint.h
--- a/head/sys/sparc64/include/_stdint.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/include/_stdint.h Wed Jul 25 16:40:53 2012 +0300
@@ -27,7 +27,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/sparc64/include/_stdint.h 237517 2012-06-24 04:15:58Z andrew $
*/
#ifndef _MACHINE__STDINT_H_
@@ -149,12 +149,6 @@
/* Limit of size_t. */
#define SIZE_MAX UINT64_MAX
-#ifndef WCHAR_MIN /* Also possibly defined in <wchar.h> */
-/* Limits of wchar_t. */
-#define WCHAR_MIN INT32_MIN
-#define WCHAR_MAX INT32_MAX
-#endif
-
/* Limits of wint_t. */
#define WINT_MIN INT32_MIN
#define WINT_MAX INT32_MAX
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/include/_types.h
--- a/head/sys/sparc64/include/_types.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/include/_types.h Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
*
* From: @(#)ansi.h 8.2 (Berkeley) 1/4/94
* From: @(#)types.h 8.3 (Berkeley) 1/5/94
- * $FreeBSD: head/sys/sparc64/include/_types.h 228469 2011-12-13 13:38:03Z ed $
+ * $FreeBSD: head/sys/sparc64/include/_types.h 237517 2012-06-24 04:15:58Z andrew $
*/
#ifndef _MACHINE__TYPES_H_
@@ -92,6 +92,10 @@
typedef __uint64_t __vm_paddr_t;
typedef __uint64_t __vm_pindex_t;
typedef __uint64_t __vm_size_t;
+typedef int __wchar_t;
+
+#define __WCHAR_MIN __INT_MIN /* min value for a wchar_t */
+#define __WCHAR_MAX __INT_MAX /* max value for a wchar_t */
/*
* Unusual type definitions.
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/include/elf.h
--- a/head/sys/sparc64/include/elf.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/include/elf.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/sparc64/include/elf.h 237430 2012-06-22 06:38:31Z kib $
*/
#ifndef _MACHINE_ELF_H_
@@ -90,6 +90,7 @@
#define AT_NCPUS 19 /* Number of CPUs. */
#define AT_PAGESIZES 20 /* Pagesizes. */
#define AT_PAGESIZESLEN 21 /* Number of pagesizes. */
+#define AT_TIMEKEEP 22 /* Pointer to timehands. */
#define AT_STACKPROT 23 /* Initial stack protection. */
#define AT_COUNT 24 /* Count of defined aux entry types. */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/include/in_cksum.h
--- a/head/sys/sparc64/include/in_cksum.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/include/in_cksum.h Wed Jul 25 16:40:53 2012 +0300
@@ -55,7 +55,7 @@
* from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp
* from: FreeBSD: src/sys/alpha/include/in_cksum.h,v 1.5 2000/05/06
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/sparc64/include/in_cksum.h 235941 2012-05-24 22:00:48Z bz $
*/
#ifndef _MACHINE_IN_CKSUM_H_
@@ -65,6 +65,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
static __inline void
in_cksum_update(struct ip *ip)
{
@@ -73,6 +74,7 @@
__tmp = (int)ip->ip_sum + 1;
ip->ip_sum = __tmp + (__tmp >> 16);
}
+#endif
static __inline u_short
in_addword(u_short sum, u_short b)
@@ -106,6 +108,7 @@
return (sum);
}
+#if defined(IPVERSION) && (IPVERSION == 4)
static __inline u_int
in_cksum_hdr(struct ip *ip)
{
@@ -163,6 +166,7 @@
#undef __LD_ADD
return (__ret);
}
+#endif
#ifdef _KERNEL
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/include/intr_machdep.h
--- a/head/sys/sparc64/include/intr_machdep.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/include/intr_machdep.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/sparc64/include/intr_machdep.h 235232 2012-05-10 15:23:20Z marius $
*/
#ifndef _MACHINE_INTR_MACHDEP_H_
@@ -91,10 +91,10 @@
extern ih_func_t *intr_handlers[];
extern struct intr_vector intr_vectors[];
+void intr_add_cpu(u_int cpu);
#ifdef SMP
-void intr_add_cpu(u_int cpu);
+int intr_bind(int vec, u_char cpu);
#endif
-int intr_bind(int vec, u_char cpu);
int intr_describe(int vec, void *ih, const char *descr);
void intr_setup(int level, ih_func_t *ihf, int pri, iv_func_t *ivf,
void *iva);
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/include/pcb.h
--- a/head/sys/sparc64/include/pcb.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/include/pcb.h Wed Jul 25 16:40:53 2012 +0300
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/sys/sparc64/include/pcb.h 234785 2012-04-29 11:04:31Z dim $
*/
#ifndef _MACHINE_PCB_H_
@@ -55,7 +55,7 @@
#ifdef _KERNEL
void makectx(struct trapframe *tf, struct pcb *pcb);
-int savectx(struct pcb *pcb);
+int savectx(struct pcb *pcb) __returns_twice;
#endif
#endif /* !LOCORE */
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/include/pmap.h
--- a/head/sys/sparc64/include/pmap.h Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/include/pmap.h Wed Jul 25 16:40:53 2012 +0300
@@ -33,7 +33,7 @@
* from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
* from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
* from: FreeBSD: src/sys/i386/include/pmap.h,v 1.70 2000/11/30
- * $FreeBSD: head/sys/sparc64/include/pmap.h 223800 2011-07-05 18:50:40Z marius $
+ * $FreeBSD: head/sys/sparc64/include/pmap.h 237168 2012-06-16 18:56:19Z alc $
*/
#ifndef _MACHINE_PMAP_H_
@@ -43,6 +43,7 @@
#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/_rwlock.h>
#include <machine/cache.h>
#include <machine/tte.h>
@@ -79,6 +80,7 @@
#define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx)
#define pmap_page_get_memattr(m) VM_MEMATTR_DEFAULT
+#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_page_set_memattr(m, ma) (void)0
void pmap_bootstrap(u_int cpu_impl);
@@ -101,6 +103,7 @@
extern struct pmap kernel_pmap_store;
#define kernel_pmap (&kernel_pmap_store)
+extern struct rwlock tte_list_global_lock;
extern vm_paddr_t phys_avail[];
extern vm_offset_t virtual_avail;
extern vm_offset_t virtual_end;
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/include/vdso.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/sparc64/include/vdso.h Wed Jul 25 16:40:53 2012 +0300
@@ -0,0 +1,34 @@
+/*-
+ * Copyright 2012 Konstantin Belousov <kib at FreeBSD.ORG>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/sparc64/include/vdso.h 237433 2012-06-22 07:06:40Z kib $
+ */
+
+#ifndef _SPARC64_VDSO_H
+#define _SPARC64_VDSO_H
+
+#define VDSO_TIMEHANDS_MD \
+ uint32_t th_res[8];
+
+#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/sparc64/intr_machdep.c
--- a/head/sys/sparc64/sparc64/intr_machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/sparc64/intr_machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -59,7 +59,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/intr_machdep.c 234247 2012-04-13 22:58:23Z marius $");
+__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/intr_machdep.c 235231 2012-05-10 15:17:21Z marius $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -554,4 +554,11 @@
}
SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
NULL);
+#else /* !SMP */
+/* Use an empty stub for compatibility. */
+void
+intr_add_cpu(u_int cpu __unused)
+{
+
+}
#endif
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/sparc64/machdep.c
--- a/head/sys/sparc64/sparc64/machdep.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/sparc64/machdep.c Wed Jul 25 16:40:53 2012 +0300
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/machdep.c 234247 2012-04-13 22:58:23Z marius $");
+__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/machdep.c 234723 2012-04-26 20:24:25Z attilio $");
#include "opt_compat.h"
#include "opt_ddb.h"
@@ -197,12 +197,10 @@
cpu_identify(rdpr(ver), PCPU_GET(clock), curcpu);
-#ifdef SMP
/*
* Add BSP as an interrupt target.
*/
intr_add_cpu(0);
-#endif
}
void
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/sparc64/pmap.c
--- a/head/sys/sparc64/sparc64/pmap.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/sparc64/pmap.c Wed Jul 25 16:40:53 2012 +0300
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/pmap.c 230634 2012-01-27 23:25:24Z marius $");
+__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/pmap.c 237623 2012-06-27 03:45:25Z alc $");
/*
* Manages physical address maps.
@@ -71,6 +71,7 @@
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
@@ -134,6 +135,11 @@
struct pmap kernel_pmap_store;
/*
+ * Global tte list lock
+ */
+struct rwlock tte_list_global_lock;
+
+/*
* Allocate physical memory for use in pmap_bootstrap.
*/
static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
@@ -666,6 +672,12 @@
pm->pm_context[i] = TLB_CTX_KERNEL;
CPU_FILL(&pm->pm_active);
+ /*
+ * Initialize the global tte list lock, which is more commonly
+ * known as the pmap pv global lock.
+ */
+ rw_init(&tte_list_global_lock, "pmap pv global");
+
/*
* Flush all non-locked TLB entries possibly left over by the
* firmware.
@@ -876,7 +888,7 @@
struct tte *tp;
int color;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&tte_list_global_lock, RA_WLOCKED);
KASSERT((m->flags & PG_FICTITIOUS) == 0,
("pmap_cache_enter: fake page"));
PMAP_STATS_INC(pmap_ncache_enter);
@@ -951,7 +963,7 @@
struct tte *tp;
int color;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&tte_list_global_lock, RA_WLOCKED);
CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
m->md.colors[DCACHE_COLOR(va)]);
KASSERT((m->flags & PG_FICTITIOUS) == 0,
@@ -1026,7 +1038,7 @@
vm_page_t om;
u_long data;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&tte_list_global_lock, RA_WLOCKED);
PMAP_STATS_INC(pmap_nkenter);
tp = tsb_kvtotte(va);
CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
@@ -1088,7 +1100,7 @@
struct tte *tp;
vm_page_t m;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&tte_list_global_lock, RA_WLOCKED);
PMAP_STATS_INC(pmap_nkremove);
tp = tsb_kvtotte(va);
CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
@@ -1139,19 +1151,16 @@
pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
{
vm_offset_t va;
- int locked;
PMAP_STATS_INC(pmap_nqenter);
va = sva;
- if (!(locked = mtx_owned(&vm_page_queue_mtx)))
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
while (count-- > 0) {
pmap_kenter(va, *m);
va += PAGE_SIZE;
m++;
}
- if (!locked)
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
tlb_range_demap(kernel_pmap, sva, va);
}
@@ -1163,18 +1172,15 @@
pmap_qremove(vm_offset_t sva, int count)
{
vm_offset_t va;
- int locked;
PMAP_STATS_INC(pmap_nqremove);
va = sva;
- if (!(locked = mtx_owned(&vm_page_queue_mtx)))
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
while (count-- > 0) {
pmap_kremove(va);
va += PAGE_SIZE;
}
- if (!locked)
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
tlb_range_demap(kernel_pmap, sva, va);
}
@@ -1322,7 +1328,7 @@
vm_page_t m;
u_long data;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&tte_list_global_lock, RA_WLOCKED);
data = atomic_readandclear_long(&tp->tte_data);
if ((data & TD_FAKE) == 0) {
m = PHYS_TO_VM_PAGE(TD_PA(data));
@@ -1359,7 +1365,7 @@
pm->pm_context[curcpu], start, end);
if (PMAP_REMOVE_DONE(pm))
return;
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
PMAP_LOCK(pm);
if (end - start > PMAP_TSB_THRESH) {
tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
@@ -1372,7 +1378,7 @@
tlb_range_demap(pm, start, end - 1);
}
PMAP_UNLOCK(pm);
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
}
void
@@ -1385,7 +1391,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
tpn = TAILQ_NEXT(tp, tte_link);
if ((tp->tte_data & TD_PV) == 0)
@@ -1408,7 +1414,7 @@
PMAP_UNLOCK(pm);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
}
static int
@@ -1470,10 +1476,10 @@
vm_prot_t prot, boolean_t wired)
{
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
PMAP_LOCK(pm);
pmap_enter_locked(pm, va, m, prot, wired);
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
PMAP_UNLOCK(pm);
}
@@ -1493,7 +1499,7 @@
vm_page_t real;
u_long data;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&tte_list_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pm, MA_OWNED);
KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
VM_OBJECT_LOCKED(m->object),
@@ -1636,14 +1642,14 @@
psize = atop(end - start);
m = m_start;
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
PMAP_LOCK(pm);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
pmap_enter_locked(pm, start + ptoa(diff), m, prot &
(VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
m = TAILQ_NEXT(m, listq);
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
PMAP_UNLOCK(pm);
}
@@ -1651,11 +1657,11 @@
pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
PMAP_LOCK(pm);
pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
FALSE);
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
PMAP_UNLOCK(pm);
}
@@ -1721,7 +1727,7 @@
if (dst_addr != src_addr)
return;
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
@@ -1739,7 +1745,7 @@
pmap_copy_tte(src_pmap, dst_pmap, tp, va);
tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -1938,7 +1944,7 @@
("pmap_page_exists_quick: page %p is not managed", m));
loops = 0;
rv = FALSE;
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
if ((tp->tte_data & TD_PV) == 0)
continue;
@@ -1949,7 +1955,7 @@
if (++loops >= 16)
break;
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
return (rv);
}
@@ -1966,11 +1972,11 @@
count = 0;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (count);
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
count++;
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
return (count);
}
@@ -1997,13 +2003,13 @@
rv = FALSE;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (rv);
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
if ((tp->tte_data & TD_PV) != 0) {
rv = TRUE;
break;
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
return (rv);
}
@@ -2029,7 +2035,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
count = 0;
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
tpf = tp;
do {
@@ -2043,7 +2049,7 @@
break;
} while ((tp = tpn) != NULL && tp != tpf);
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
return (count);
}
@@ -2066,7 +2072,7 @@
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return (rv);
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
if ((tp->tte_data & TD_PV) == 0)
continue;
@@ -2075,7 +2081,7 @@
break;
}
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
return (rv);
}
@@ -2109,7 +2115,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_referenced: page %p is not managed", m));
rv = FALSE;
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
if ((tp->tte_data & TD_PV) == 0)
continue;
@@ -2118,7 +2124,7 @@
break;
}
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
return (rv);
}
@@ -2141,7 +2147,7 @@
*/
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
if ((tp->tte_data & TD_PV) == 0)
continue;
@@ -2149,7 +2155,7 @@
if ((data & TD_W) != 0)
tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
}
void
@@ -2160,7 +2166,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_reference: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
if ((tp->tte_data & TD_PV) == 0)
continue;
@@ -2168,7 +2174,7 @@
if ((data & TD_REF) != 0)
tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
}
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
}
void
@@ -2189,7 +2195,7 @@
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&tte_list_global_lock);
TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
if ((tp->tte_data & TD_PV) == 0)
continue;
@@ -2200,7 +2206,7 @@
}
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&tte_list_global_lock);
}
int
diff -r 58bcef12a717 -r fc630f3c8529 head/sys/sparc64/sparc64/tsb.c
--- a/head/sys/sparc64/sparc64/tsb.c Wed Jul 25 16:32:50 2012 +0300
+++ b/head/sys/sparc64/sparc64/tsb.c Wed Jul 25 16:40:53 2012 +0300
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/tsb.c 236214 2012-05-29 01:52:38Z alc $");
#include "opt_ddb.h"
#include "opt_pmap.h"
@@ -40,6 +40,7 @@
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
@@ -131,7 +132,7 @@
PMAP_STATS_INC(tsb_nenter_u_oc);
}
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&tte_list_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pm, MA_OWNED);
if (pm == kernel_pmap) {
PMAP_STATS_INC(tsb_nenter_k);
More information about the Zrouter-src-freebsd
mailing list