[Zrouter-src-freebsd] ZRouter.org: push to FreeBSD HEAD tree

zrouter-src-freebsd at zrouter.org zrouter-src-freebsd at zrouter.org
Wed Jul 25 14:36:28 UTC 2012


details:   http://zrouter.org/hg/FreeBSD/head//rev/8a9451a986e0
changeset: 503:8a9451a986e0
user:      Aleksandr Rybalko <ray at ddteam.net>
date:      Wed Jul 25 17:04:43 2012 +0300
description:
Lazy update

diffstat:

 head/sys/dev/aac/aac_disk.c                       |      9 +-
 head/sys/dev/acpi_support/acpi_asus_wmi.c         |    651 +
 head/sys/dev/acpi_support/acpi_ibm.c              |    459 +-
 head/sys/dev/acpica/Osd/OsdSchedule.c             |     16 +-
 head/sys/dev/acpica/Osd/OsdSynch.c                |      6 +-
 head/sys/dev/acpica/acpi.c                        |     55 +-
 head/sys/dev/acpica/acpi_cpu.c                    |    177 +-
 head/sys/dev/acpica/acpi_ec.c                     |     19 +-
 head/sys/dev/acpica/acpi_powerres.c               |      6 +-
 head/sys/dev/acpica/acpi_smbat.c                  |      3 +-
 head/sys/dev/acpica/acpi_video.c                  |     34 +-
 head/sys/dev/acpica/acpivar.h                     |     10 +-
 head/sys/dev/adb/adb_kbd.c                        |      4 +-
 head/sys/dev/ae/if_ae.c                           |     15 +-
 head/sys/dev/agp/agp.c                            |     17 +-
 head/sys/dev/agp/agp_i810.c                       |   2718 +-
 head/sys/dev/agp/agp_i810.h                       |    101 +
 head/sys/dev/agp/agp_if.m                         |     14 +-
 head/sys/dev/agp/agppriv.h                        |      4 +-
 head/sys/dev/agp/agpreg.h                         |     81 +-
 head/sys/dev/agp/agpvar.h                         |      8 +-
 head/sys/dev/ahci/ahci.c                          |     10 +-
 head/sys/dev/aic7xxx/aic79xx.c                    |     11 +-
 head/sys/dev/aic7xxx/aic79xx_osm.c                |     10 +-
 head/sys/dev/aic7xxx/aic7xxx.c                    |     11 +-
 head/sys/dev/aic7xxx/aic7xxx_osm.c                |     10 +-
 head/sys/dev/aic7xxx/aicasm/Makefile              |      5 +-
 head/sys/dev/aic7xxx/aicasm/aicasm.c              |      4 +-
 head/sys/dev/ata/ata-all.c                        |     10 +-
 head/sys/dev/ata/ata-lowlevel.c                   |     34 +-
 head/sys/dev/ata/chipsets/ata-ite.c               |      9 +-
 head/sys/dev/ata/chipsets/ata-via.c               |      6 +-
 head/sys/dev/atkbdc/atkbdc_isa.c                  |      3 +-
 head/sys/dev/bce/if_bce.c                         |    509 +-
 head/sys/dev/bce/if_bcereg.h                      |     41 +-
 head/sys/dev/bge/if_bge.c                         |     25 +-
 head/sys/dev/bge/if_bgereg.h                      |      6 +-
 head/sys/dev/bwi/bwimac.c                         |      3 +-
 head/sys/dev/bwi/bwiphy.c                         |      3 +-
 head/sys/dev/bwi/bwirf.c                          |      3 +-
 head/sys/dev/bwi/if_bwi.c                         |      3 +-
 head/sys/dev/bwi/if_bwi_pci.c                     |      4 +-
 head/sys/dev/cxgb/common/cxgb_ctl_defs.h          |     12 +-
 head/sys/dev/cxgb/cxgb_adapter.h                  |     53 +-
 head/sys/dev/cxgb/cxgb_main.c                     |    518 +-
 head/sys/dev/cxgb/cxgb_offload.c                  |    465 -
 head/sys/dev/cxgb/cxgb_offload.h                  |    257 +-
 head/sys/dev/cxgb/cxgb_osdep.h                    |     31 +-
 head/sys/dev/cxgb/cxgb_sge.c                      |    492 +-
 head/sys/dev/cxgb/sys/mvec.h                      |     23 +-
 head/sys/dev/cxgb/t3cdev.h                        |     62 -
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c           |    326 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h           |     45 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c        |    403 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h        |     19 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c        |     16 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c       |    152 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c        |     57 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c       |    517 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h       |     98 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h  |     22 +
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c       |     96 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c  |    411 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h  |     14 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c        |    417 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c  |     15 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h      |     10 +-
 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h        |     77 +-
 head/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h       |     49 -
 head/sys/dev/cxgb/ulp/toecore/toedev.c            |    420 -
 head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c           |   5163 +-
 head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c       |   1034 -
 head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c              |    738 -
 head/sys/dev/cxgb/ulp/tom/cxgb_defs.h             |     91 -
 head/sys/dev/cxgb/ulp/tom/cxgb_l2t.c              |    606 +-
 head/sys/dev/cxgb/ulp/tom/cxgb_l2t.h              |    125 +-
 head/sys/dev/cxgb/ulp/tom/cxgb_listen.c           |   1323 +-
 head/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h           |    181 -
 head/sys/dev/cxgb/ulp/tom/cxgb_tcp.h              |     47 -
 head/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c      |     97 -
 head/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h      |     14 -
 head/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h           |     71 +-
 head/sys/dev/cxgb/ulp/tom/cxgb_tom.c              |   1717 +-
 head/sys/dev/cxgb/ulp/tom/cxgb_tom.h              |    317 +-
 head/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c       |    140 -
 head/sys/dev/cxgbe/adapter.h                      |    110 +-
 head/sys/dev/cxgbe/common/common.h                |     30 +-
 head/sys/dev/cxgbe/common/t4_hw.c                 |    682 +-
 head/sys/dev/cxgbe/common/t4_msg.h                |    410 +-
 head/sys/dev/cxgbe/firmware/t4fw_cfg.txt          |     24 +-
 head/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt    |     50 +-
 head/sys/dev/cxgbe/firmware/t4fw_interface.h      |   2103 +-
 head/sys/dev/cxgbe/offload.h                      |     21 +-
 head/sys/dev/cxgbe/osdep.h                        |      3 +-
 head/sys/dev/cxgbe/t4_l2t.c                       |    566 +-
 head/sys/dev/cxgbe/t4_l2t.h                       |     57 +-
 head/sys/dev/cxgbe/t4_main.c                      |    411 +-
 head/sys/dev/cxgbe/t4_sge.c                       |    286 +-
 head/sys/dev/cxgbe/tom/t4_connect.c               |    377 +
 head/sys/dev/cxgbe/tom/t4_cpl_io.c                |   1276 +
 head/sys/dev/cxgbe/tom/t4_listen.c                |   1362 +
 head/sys/dev/cxgbe/tom/t4_tom.c                   |    755 +
 head/sys/dev/cxgbe/tom/t4_tom.h                   |    248 +
 head/sys/dev/cxgbe/tom/t4_tom_l2t.c               |    405 +
 head/sys/dev/cxgbe/tom/t4_tom_l2t.h               |     53 +
 head/sys/dev/dc/if_dc.c                           |      7 +-
 head/sys/dev/dpt/dpt.h                            |      4 +-
 head/sys/dev/drm2/drm.h                           |   1214 +
 head/sys/dev/drm2/drmP.h                          |   1400 +
 head/sys/dev/drm2/drm_agpsupport.c                |    434 +
 head/sys/dev/drm2/drm_atomic.h                    |     93 +
 head/sys/dev/drm2/drm_auth.c                      |    190 +
 head/sys/dev/drm2/drm_bufs.c                      |   1130 +
 head/sys/dev/drm2/drm_context.c                   |    312 +
 head/sys/dev/drm2/drm_crtc.c                      |   3413 +
 head/sys/dev/drm2/drm_crtc.h                      |    935 +
 head/sys/dev/drm2/drm_crtc_helper.c               |   1043 +
 head/sys/dev/drm2/drm_crtc_helper.h               |    146 +
 head/sys/dev/drm2/drm_dma.c                       |    139 +
 head/sys/dev/drm2/drm_dp_helper.h                 |    250 +
 head/sys/dev/drm2/drm_dp_iic_helper.c             |    292 +
 head/sys/dev/drm2/drm_drawable.c                  |    173 +
 head/sys/dev/drm2/drm_drv.c                       |    980 +
 head/sys/dev/drm2/drm_edid.c                      |   1781 +
 head/sys/dev/drm2/drm_edid.h                      |    244 +
 head/sys/dev/drm2/drm_edid_modes.h                |    381 +
 head/sys/dev/drm2/drm_fb_helper.c                 |   1568 +
 head/sys/dev/drm2/drm_fb_helper.h                 |    141 +
 head/sys/dev/drm2/drm_fops.c                      |    202 +
 head/sys/dev/drm2/drm_fourcc.h                    |    139 +
 head/sys/dev/drm2/drm_gem.c                       |    487 +
 head/sys/dev/drm2/drm_gem_names.c                 |    211 +
 head/sys/dev/drm2/drm_gem_names.h                 |     64 +
 head/sys/dev/drm2/drm_hashtab.c                   |    181 +
 head/sys/dev/drm2/drm_hashtab.h                   |     68 +
 head/sys/dev/drm2/drm_internal.h                  |     43 +
 head/sys/dev/drm2/drm_ioctl.c                     |    320 +
 head/sys/dev/drm2/drm_irq.c                       |   1253 +
 head/sys/dev/drm2/drm_linux_list.h                |    177 +
 head/sys/dev/drm2/drm_linux_list_sort.c           |     75 +
 head/sys/dev/drm2/drm_lock.c                      |    199 +
 head/sys/dev/drm2/drm_memory.c                    |    127 +
 head/sys/dev/drm2/drm_mm.c                        |    563 +
 head/sys/dev/drm2/drm_mm.h                        |    185 +
 head/sys/dev/drm2/drm_mode.h                      |    444 +
 head/sys/dev/drm2/drm_modes.c                     |   1147 +
 head/sys/dev/drm2/drm_pci.c                       |    125 +
 head/sys/dev/drm2/drm_pciids.h                    |    770 +
 head/sys/dev/drm2/drm_sarea.h                     |     87 +
 head/sys/dev/drm2/drm_scatter.c                   |    129 +
 head/sys/dev/drm2/drm_sman.c                      |    352 +
 head/sys/dev/drm2/drm_sman.h                      |    181 +
 head/sys/dev/drm2/drm_stub.c                      |     60 +
 head/sys/dev/drm2/drm_sysctl.c                    |    364 +
 head/sys/dev/drm2/drm_vm.c                        |    134 +
 head/sys/dev/drm2/i915/i915_debug.c               |   1683 +
 head/sys/dev/drm2/i915/i915_dma.c                 |   2075 +
 head/sys/dev/drm2/i915/i915_drm.h                 |    971 +
 head/sys/dev/drm2/i915/i915_drv.c                 |    821 +
 head/sys/dev/drm2/i915/i915_drv.h                 |   1481 +
 head/sys/dev/drm2/i915/i915_gem.c                 |   3760 +
 head/sys/dev/drm2/i915/i915_gem_evict.c           |    213 +
 head/sys/dev/drm2/i915/i915_gem_execbuffer.c      |   1528 +
 head/sys/dev/drm2/i915/i915_gem_gtt.c             |    329 +
 head/sys/dev/drm2/i915/i915_gem_tiling.c          |    495 +
 head/sys/dev/drm2/i915/i915_irq.c                 |   2278 +
 head/sys/dev/drm2/i915/i915_reg.h                 |   3876 +
 head/sys/dev/drm2/i915/i915_suspend.c             |    909 +
 head/sys/dev/drm2/i915/intel_bios.c               |    737 +
 head/sys/dev/drm2/i915/intel_bios.h               |    620 +
 head/sys/dev/drm2/i915/intel_crt.c                |    624 +
 head/sys/dev/drm2/i915/intel_display.c            |   9532 ++++
 head/sys/dev/drm2/i915/intel_dp.c                 |   2562 +
 head/sys/dev/drm2/i915/intel_drv.h                |    428 +
 head/sys/dev/drm2/i915/intel_fb.c                 |    270 +
 head/sys/dev/drm2/i915/intel_hdmi.c               |    576 +
 head/sys/dev/drm2/i915/intel_iic.c                |    716 +
 head/sys/dev/drm2/i915/intel_lvds.c               |   1125 +
 head/sys/dev/drm2/i915/intel_modes.c              |    143 +
 head/sys/dev/drm2/i915/intel_opregion.c           |    550 +
 head/sys/dev/drm2/i915/intel_overlay.c            |   1582 +
 head/sys/dev/drm2/i915/intel_panel.c              |    327 +
 head/sys/dev/drm2/i915/intel_ringbuffer.c         |   1623 +
 head/sys/dev/drm2/i915/intel_ringbuffer.h         |    203 +
 head/sys/dev/drm2/i915/intel_sdvo.c               |   2680 +
 head/sys/dev/drm2/i915/intel_sdvo_regs.h          |    725 +
 head/sys/dev/drm2/i915/intel_sprite.c             |    669 +
 head/sys/dev/drm2/i915/intel_tv.c                 |   1609 +
 head/sys/dev/e1000/e1000_82541.c                  |      4 +-
 head/sys/dev/e1000/e1000_82543.c                  |     10 +-
 head/sys/dev/e1000/e1000_82571.c                  |      4 +-
 head/sys/dev/e1000/e1000_82575.c                  |    197 +-
 head/sys/dev/e1000/e1000_api.c                    |     19 +-
 head/sys/dev/e1000/e1000_api.h                    |      6 +-
 head/sys/dev/e1000/e1000_defines.h                |     40 +-
 head/sys/dev/e1000/e1000_hw.h                     |     21 +-
 head/sys/dev/e1000/e1000_i210.c                   |    740 +
 head/sys/dev/e1000/e1000_i210.h                   |     80 +
 head/sys/dev/e1000/e1000_ich8lan.c                |      4 +-
 head/sys/dev/e1000/e1000_mac.c                    |    604 +-
 head/sys/dev/e1000/e1000_mac.h                    |     14 +-
 head/sys/dev/e1000/e1000_manage.c                 |    326 +-
 head/sys/dev/e1000/e1000_manage.h                 |     62 +-
 head/sys/dev/e1000/e1000_phy.c                    |   1057 +-
 head/sys/dev/e1000/e1000_phy.h                    |     26 +-
 head/sys/dev/e1000/e1000_regs.h                   |     77 +-
 head/sys/dev/e1000/if_em.c                        |     37 +-
 head/sys/dev/e1000/if_igb.c                       |    178 +-
 head/sys/dev/e1000/if_igb.h                       |      4 +-
 head/sys/dev/esp/ncr53c9x.c                       |      6 +-
 head/sys/dev/etherswitch/arswitch/arswitch.c      |    577 +
 head/sys/dev/etherswitch/arswitch/arswitch_7240.c |    142 +
 head/sys/dev/etherswitch/arswitch/arswitch_7240.h |     35 +
 head/sys/dev/etherswitch/arswitch/arswitch_8216.c |     92 +
 head/sys/dev/etherswitch/arswitch/arswitch_8216.h |     34 +
 head/sys/dev/etherswitch/arswitch/arswitch_8226.c |     92 +
 head/sys/dev/etherswitch/arswitch/arswitch_8226.h |     34 +
 head/sys/dev/etherswitch/arswitch/arswitch_8316.c |    151 +
 head/sys/dev/etherswitch/arswitch/arswitch_8316.h |     34 +
 head/sys/dev/etherswitch/arswitch/arswitch_phy.c  |    136 +
 head/sys/dev/etherswitch/arswitch/arswitch_phy.h  |     34 +
 head/sys/dev/etherswitch/arswitch/arswitch_reg.c  |    180 +
 head/sys/dev/etherswitch/arswitch/arswitch_reg.h  |     44 +
 head/sys/dev/etherswitch/arswitch/arswitchreg.h   |    291 +
 head/sys/dev/etherswitch/arswitch/arswitchvar.h   |     92 +
 head/sys/dev/etherswitch/etherswitch.c            |    257 +
 head/sys/dev/etherswitch/etherswitch.h            |     70 +
 head/sys/dev/etherswitch/etherswitch_if.m         |     86 +
 head/sys/dev/etherswitch/mdio.c                   |    117 +
 head/sys/dev/etherswitch/mdio.h                   |     35 +
 head/sys/dev/etherswitch/mdio_if.m                |     24 +
 head/sys/dev/etherswitch/miiproxy.c               |    444 +
 head/sys/dev/etherswitch/miiproxy.h               |     37 +
 head/sys/dev/etherswitch/rtl8366/rtl8366rb.c      |    755 +
 head/sys/dev/etherswitch/rtl8366/rtl8366rbvar.h   |    176 +
 head/sys/dev/fb/fbreg.h                           |     16 +-
 head/sys/dev/fdt/fdt_common.h                     |      3 +-
 head/sys/dev/fdt/fdt_pci.c                        |     10 +-
 head/sys/dev/fdt/fdt_slicer.c                     |    115 +
 head/sys/dev/fdt/fdtbus.c                         |     73 +-
 head/sys/dev/fdt/simplebus.c                      |     47 +-
 head/sys/dev/filemon/filemon.c                    |    377 +
 head/sys/dev/filemon/filemon.h                    |     34 +
 head/sys/dev/filemon/filemon_lock.c               |    122 +
 head/sys/dev/filemon/filemon_wrapper.c            |    742 +
 head/sys/dev/firewire/sbp_targ.c                  |     12 +-
 head/sys/dev/fxp/if_fxp.c                         |      9 +-
 head/sys/dev/gpio/zwmux.c                         |    666 +
 head/sys/dev/gxemul/cons/gxemul_cons.c            |    334 +
 head/sys/dev/gxemul/disk/gxemul_disk.c            |    315 +
 head/sys/dev/gxemul/disk/gxemul_diskreg.h         |     57 +
 head/sys/dev/gxemul/ether/gxreg.h                 |     56 +
 head/sys/dev/gxemul/ether/if_gx.c                 |    398 +
 head/sys/dev/hptiop/hptiop.c                      |      3 +-
 head/sys/dev/hptmv/entry.c                        |      3 +-
 head/sys/dev/hptrr/hptrr_osm_bsd.c                |      4 +-
 head/sys/dev/hwpmc/hwpmc_arm.c                    |    109 +-
 head/sys/dev/hwpmc/hwpmc_core.c                   |      5 +-
 head/sys/dev/hwpmc/hwpmc_intel.c                  |     15 +-
 head/sys/dev/hwpmc/hwpmc_mips.c                   |      6 +-
 head/sys/dev/hwpmc/hwpmc_powerpc.c                |      6 +-
 head/sys/dev/isci/isci.h                          |     34 +-
 head/sys/dev/isci/isci_controller.c               |     49 +-
 head/sys/dev/isci/isci_interrupt.c                |     12 +-
 head/sys/dev/isci/isci_io_request.c               |     53 +-
 head/sys/dev/isci/isci_remote_device.c            |     22 +-
 head/sys/dev/isci/scil/sati_inquiry.c             |      4 +-
 head/sys/dev/isp/isp.c                            |    849 +-
 head/sys/dev/isp/isp_freebsd.c                    |    224 +-
 head/sys/dev/isp/isp_freebsd.h                    |     29 +-
 head/sys/dev/isp/isp_pci.c                        |     40 +-
 head/sys/dev/isp/isp_sbus.c                       |      4 +-
 head/sys/dev/isp/ispmbox.h                        |     32 +-
 head/sys/dev/isp/ispreg.h                         |     21 +-
 head/sys/dev/isp/ispvar.h                         |     25 +-
 head/sys/dev/ispfw/asm_2400.h                     |  46544 ++++++++++---------
 head/sys/dev/ispfw/asm_2500.h                     |  39521 ++++++++--------
 head/sys/dev/iwn/if_iwn.c                         |    110 +-
 head/sys/dev/ixgbe/ixgbe.c                        |    194 +-
 head/sys/dev/ixgbe/ixgbe.h                        |      4 +-
 head/sys/dev/ixgbe/ixgbe_82598.c                  |    102 +-
 head/sys/dev/ixgbe/ixgbe_82598.h                  |      4 +-
 head/sys/dev/ixgbe/ixgbe_82599.c                  |     14 +-
 head/sys/dev/ixgbe/ixgbe_api.c                    |     23 +-
 head/sys/dev/ixgbe/ixgbe_api.h                    |      5 +-
 head/sys/dev/ixgbe/ixgbe_common.c                 |    845 +-
 head/sys/dev/ixgbe/ixgbe_common.h                 |      9 +-
 head/sys/dev/ixgbe/ixgbe_osdep.h                  |     11 +-
 head/sys/dev/ixgbe/ixgbe_phy.c                    |     67 +-
 head/sys/dev/ixgbe/ixgbe_type.h                   |     97 +-
 head/sys/dev/ixgbe/ixgbe_vf.c                     |     24 +-
 head/sys/dev/ixgbe/ixgbe_x540.c                   |      8 +-
 head/sys/dev/ixgbe/ixv.c                          |     10 +-
 head/sys/dev/jme/if_jme.c                         |      4 +-
 head/sys/dev/md/md.c                              |     74 +-
 head/sys/dev/mfi/mfi.c                            |     56 +-
 head/sys/dev/mfi/mfi_cam.c                        |      6 +-
 head/sys/dev/mfi/mfi_debug.c                      |      5 +-
 head/sys/dev/mfi/mfi_disk.c                       |     24 +-
 head/sys/dev/mfi/mfi_tbolt.c                      |    345 +-
 head/sys/dev/mfi/mfireg.h                         |     12 +-
 head/sys/dev/mfi/mfivar.h                         |     20 +-
 head/sys/dev/mmc/mmc.c                            |    194 +-
 head/sys/dev/mmc/mmcsd.c                          |     13 +-
 head/sys/dev/mps/mpi/mpi2.h                       |      6 +-
 head/sys/dev/mps/mpi/mpi2_cnfg.h                  |      6 +-
 head/sys/dev/mps/mpi/mpi2_hbd.h                   |      6 +-
 head/sys/dev/mps/mpi/mpi2_history.txt             |      6 +-
 head/sys/dev/mps/mpi/mpi2_init.h                  |      6 +-
 head/sys/dev/mps/mpi/mpi2_ioc.h                   |      6 +-
 head/sys/dev/mps/mpi/mpi2_ra.h                    |      6 +-
 head/sys/dev/mps/mpi/mpi2_raid.h                  |      6 +-
 head/sys/dev/mps/mpi/mpi2_sas.h                   |      6 +-
 head/sys/dev/mps/mpi/mpi2_targ.h                  |      6 +-
 head/sys/dev/mps/mpi/mpi2_tool.h                  |      6 +-
 head/sys/dev/mps/mpi/mpi2_type.h                  |      6 +-
 head/sys/dev/mps/mps.c                            |    409 +-
 head/sys/dev/mps/mps_config.c                     |      5 +-
 head/sys/dev/mps/mps_ioctl.h                      |      6 +-
 head/sys/dev/mps/mps_mapping.c                    |      4 +-
 head/sys/dev/mps/mps_mapping.h                    |      4 +-
 head/sys/dev/mps/mps_sas.c                        |    336 +-
 head/sys/dev/mps/mps_sas.h                        |      4 +-
 head/sys/dev/mps/mps_sas_lsi.c                    |    153 +-
 head/sys/dev/mps/mps_user.c                       |     52 +-
 head/sys/dev/mps/mpsvar.h                         |     70 +-
 head/sys/dev/mpt/mpt_cam.c                        |     38 +-
 head/sys/dev/msk/if_msk.c                         |      5 +-
 head/sys/dev/mvs/mvs.c                            |     47 +-
 head/sys/dev/mvs/mvs.h                            |      7 +-
 head/sys/dev/mvs/mvs_soc.c                        |      4 +-
 head/sys/dev/mxge/eth_z8e.h                       |  15038 +++---
 head/sys/dev/mxge/ethp_z8e.h                      |  15121 +++---
 head/sys/dev/mxge/rss_eth_z8e.h                   |  19372 ++++----
 head/sys/dev/mxge/rss_ethp_z8e.h                  |  19500 ++++----
 head/sys/dev/nand/nand.c                          |    834 +
 head/sys/dev/nand/nand.h                          |    385 +
 head/sys/dev/nand/nand_bbt.c                      |    273 +
 head/sys/dev/nand/nand_cdev.c                     |    413 +
 head/sys/dev/nand/nand_dev.h                      |     90 +
 head/sys/dev/nand/nand_ecc_pos.h                  |     56 +
 head/sys/dev/nand/nand_generic.c                  |   1320 +
 head/sys/dev/nand/nand_geom.c                     |    414 +
 head/sys/dev/nand/nand_id.c                       |     60 +
 head/sys/dev/nand/nand_if.m                       |    168 +
 head/sys/dev/nand/nandbus.c                       |    530 +
 head/sys/dev/nand/nandbus.h                       |     49 +
 head/sys/dev/nand/nandbus_if.m                    |    100 +
 head/sys/dev/nand/nandsim.c                       |    667 +
 head/sys/dev/nand/nandsim.h                       |    175 +
 head/sys/dev/nand/nandsim_chip.c                  |    901 +
 head/sys/dev/nand/nandsim_chip.h                  |    159 +
 head/sys/dev/nand/nandsim_ctrl.c                  |    396 +
 head/sys/dev/nand/nandsim_log.c                   |    186 +
 head/sys/dev/nand/nandsim_log.h                   |     52 +
 head/sys/dev/nand/nandsim_swap.c                  |    389 +
 head/sys/dev/nand/nandsim_swap.h                  |     64 +
 head/sys/dev/nand/nfc_fsl.c                       |    716 +
 head/sys/dev/nand/nfc_fsl.h                       |     97 +
 head/sys/dev/nand/nfc_if.m                        |    165 +
 head/sys/dev/nand/nfc_mv.c                        |    236 +
 head/sys/dev/netmap/head.diff                     |    654 -
 head/sys/dev/netmap/netmap.c                      |      5 +-
 head/sys/dev/pccard/pccard.c                      |     44 +-
 head/sys/dev/pccard/pccardvarp.h                  |     25 +-
 head/sys/dev/pci/pci.c                            |      6 +-
 head/sys/dev/pci/pci_pci.c                        |     27 +-
 head/sys/dev/pci/vga_pci.c                        |      3 +-
 head/sys/dev/powermac_nvram/powermac_nvram.c      |      5 +-
 head/sys/dev/puc/puc_cfg.h                        |      4 +-
 head/sys/dev/puc/pucdata.c                        |     49 +-
 head/sys/dev/qlxgb/qla_os.c                       |      3 +-
 head/sys/dev/re/if_re.c                           |     15 +-
 head/sys/dev/sdhci/sdhci.c                        |      4 +-
 head/sys/dev/sec/sec.c                            |      6 +-
 head/sys/dev/sec/sec.h                            |      3 +-
 head/sys/dev/sf/if_sf.c                           |      4 +-
 head/sys/dev/siis/siis.c                          |      6 +-
 head/sys/dev/sio/sio.c                            |      7 +-
 head/sys/dev/sound/pci/hda/hdaa.c                 |     14 +-
 head/sys/dev/sound/pci/hda/hdaa_patches.c         |     11 +-
 head/sys/dev/sound/pci/hdspe.c                    |      8 +-
 head/sys/dev/sound/pcm/sndstat.c                  |     81 +-
 head/sys/dev/sym/sym_conf.h                       |     16 +-
 head/sys/dev/sym/sym_hipd.c                       |     99 +-
 head/sys/dev/tsec/if_tsec.c                       |     19 +-
 head/sys/dev/twa/tw_osl_cam.c                     |      6 +-
 head/sys/dev/viawd/viawd.c                        |     16 +-
 head/sys/dev/virtio/balloon/virtio_balloon.c      |      3 +-
 head/sys/dev/virtio/balloon/virtio_balloon.h      |      4 +-
 head/sys/dev/virtio/block/virtio_blk.c            |     83 +-
 head/sys/dev/virtio/block/virtio_blk.h            |      4 +-
 head/sys/dev/virtio/network/if_vtnet.c            |      4 +-
 head/sys/dev/virtio/network/virtio_net.h          |      4 +-
 head/sys/dev/virtio/pci/virtio_pci.c              |    643 +-
 head/sys/dev/virtio/pci/virtio_pci.h              |      4 +-
 head/sys/dev/virtio/virtio.c                      |     39 +-
 head/sys/dev/virtio/virtio.h                      |     37 +-
 head/sys/dev/virtio/virtio_ring.h                 |     13 +-
 head/sys/dev/virtio/virtqueue.c                   |     89 +-
 head/sys/dev/virtio/virtqueue.h                   |     10 +-
 head/sys/dev/vr/if_vr.c                           |     45 +-
 head/sys/dev/vr/if_vrreg.h                        |      3 +-
 head/sys/dev/vxge/vxgehal/vxgehal-channel.h       |      4 +-
 head/sys/dev/wbwd/wbwd.c                          |     12 +-
 head/sys/dev/wpi/if_wpi.c                         |      4 +-
 head/sys/dev/xen/balloon/balloon.c                |      5 +-
 head/sys/dev/xen/blkfront/blkfront.c              |      4 +-
 head/sys/dev/xl/if_xl.c                           |      7 +-
 409 files changed, 189281 insertions(+), 95961 deletions(-)

diffs (313760 lines):

diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/aac/aac_disk.c
--- a/head/sys/dev/aac/aac_disk.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/aac/aac_disk.c	Wed Jul 25 17:04:43 2012 +0300
@@ -28,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/aac/aac_disk.c 238601 2012-07-18 18:10:27Z sbruno $");
 
 #include "opt_aac.h"
 
@@ -332,13 +332,12 @@
 void
 aac_biodone(struct bio *bp)
 {
-	struct aac_disk	*sc;
-
-	sc = (struct aac_disk *)bp->bio_disk->d_drv1;
 	fwprintf(NULL, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "");
 
-	if (bp->bio_flags & BIO_ERROR)
+	if (bp->bio_flags & BIO_ERROR) {
+		bp->bio_resid = bp->bio_bcount;
 		disk_err(bp, "hard error", -1, 1);
+	}
 
 	biodone(bp);
 }
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpi_support/acpi_asus_wmi.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/dev/acpi_support/acpi_asus_wmi.c	Wed Jul 25 17:04:43 2012 +0300
@@ -0,0 +1,651 @@
+/*-
+ * Copyright (c) 2012 Alexander Motin <mav at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/dev/acpi_support/acpi_asus_wmi.c 237981 2012-07-02 08:31:29Z mav $");
+
+#include "opt_acpi.h"
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/sbuf.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include "acpi_wmi_if.h"
+
+#define _COMPONENT	ACPI_OEM
+ACPI_MODULE_NAME("ASUS-WMI")
+
+#define ACPI_ASUS_WMI_MGMT_GUID 	"97845ED0-4E6D-11DE-8A39-0800200C9A66"
+#define ACPI_ASUS_WMI_EVENT_GUID	"0B3CBB35-E3C2-45ED-91C2-4C5A6D195D1C"
+#define ACPI_EEEPC_WMI_EVENT_GUID	"ABBC0F72-8EA1-11D1-00A0-C90629100000"
+
+/* WMI Methods */
+#define ASUS_WMI_METHODID_SPEC          0x43455053
+#define ASUS_WMI_METHODID_SFUN          0x4E554653
+#define ASUS_WMI_METHODID_DSTS          0x53544344
+#define ASUS_WMI_METHODID_DSTS2         0x53545344
+#define ASUS_WMI_METHODID_DEVS          0x53564544
+#define ASUS_WMI_METHODID_INIT          0x54494E49
+#define ASUS_WMI_METHODID_HKEY          0x59454B48
+
+#define ASUS_WMI_UNSUPPORTED_METHOD     0xFFFFFFFE
+
+/* Wireless */
+#define ASUS_WMI_DEVID_HW_SWITCH        0x00010001
+#define ASUS_WMI_DEVID_WIRELESS_LED     0x00010002
+#define ASUS_WMI_DEVID_CWAP             0x00010003
+#define ASUS_WMI_DEVID_WLAN             0x00010011
+#define ASUS_WMI_DEVID_BLUETOOTH        0x00010013
+#define ASUS_WMI_DEVID_GPS              0x00010015
+#define ASUS_WMI_DEVID_WIMAX            0x00010017
+#define ASUS_WMI_DEVID_WWAN3G           0x00010019
+#define ASUS_WMI_DEVID_UWB              0x00010021
+
+/* LEDs */
+#define ASUS_WMI_DEVID_LED1             0x00020011
+#define ASUS_WMI_DEVID_LED2             0x00020012
+#define ASUS_WMI_DEVID_LED3             0x00020013
+#define ASUS_WMI_DEVID_LED4             0x00020014
+#define ASUS_WMI_DEVID_LED5             0x00020015
+#define ASUS_WMI_DEVID_LED6             0x00020016
+
+/* Backlight and Brightness */
+#define ASUS_WMI_DEVID_BACKLIGHT        0x00050011
+#define ASUS_WMI_DEVID_BRIGHTNESS       0x00050012
+#define ASUS_WMI_DEVID_KBD_BACKLIGHT    0x00050021
+#define ASUS_WMI_DEVID_LIGHT_SENSOR     0x00050022
+
+/* Misc */
+#define ASUS_WMI_DEVID_CAMERA           0x00060013
+#define ASUS_WMI_DEVID_CARDREADER       0x00080013
+#define ASUS_WMI_DEVID_TOUCHPAD         0x00100011
+#define ASUS_WMI_DEVID_TOUCHPAD_LED     0x00100012
+#define ASUS_WMI_DEVID_THERMAL_CTRL     0x00110011
+#define ASUS_WMI_DEVID_FAN_CTRL         0x00110012
+#define ASUS_WMI_DEVID_PROCESSOR_STATE  0x00120012
+
+/* DSTS masks */
+#define ASUS_WMI_DSTS_STATUS_BIT        0x00000001
+#define ASUS_WMI_DSTS_UNKNOWN_BIT       0x00000002
+#define ASUS_WMI_DSTS_PRESENCE_BIT      0x00010000
+#define ASUS_WMI_DSTS_USER_BIT          0x00020000
+#define ASUS_WMI_DSTS_BIOS_BIT          0x00040000
+#define ASUS_WMI_DSTS_BRIGHTNESS_MASK   0x000000FF
+#define ASUS_WMI_DSTS_MAX_BRIGTH_MASK   0x0000FF00
+
+
+struct acpi_asus_wmi_softc {
+	device_t	dev;
+	device_t	wmi_dev;
+	const char	*notify_guid;
+	struct sysctl_ctx_list	*sysctl_ctx;
+	struct sysctl_oid	*sysctl_tree;
+	int		dsts_id;
+	int		handle_keys;
+};
+
+static struct {
+	char	*name;
+	int	dev_id;
+	char	*description;
+	int	access;
+} acpi_asus_wmi_sysctls[] = {
+	{
+		.name		= "hw_switch",
+		.dev_id		= ASUS_WMI_DEVID_HW_SWITCH,
+		.description	= "hw_switch",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "wireless_led",
+		.dev_id		= ASUS_WMI_DEVID_WIRELESS_LED,
+		.description	= "Wireless LED control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "cwap",
+		.dev_id		= ASUS_WMI_DEVID_CWAP,
+		.description	= "Alt+F2 function",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "wlan",
+		.dev_id		= ASUS_WMI_DEVID_WLAN,
+		.description	= "WLAN power control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "bluetooth",
+		.dev_id		= ASUS_WMI_DEVID_BLUETOOTH,
+		.description	= "Bluetooth power control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "gps",
+		.dev_id		= ASUS_WMI_DEVID_GPS,
+		.description	= "GPS power control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "wimax",
+		.dev_id		= ASUS_WMI_DEVID_WIMAX,
+		.description	= "WiMAX power control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "wwan3g",
+		.dev_id		= ASUS_WMI_DEVID_WWAN3G,
+		.description	= "WWAN-3G power control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "uwb",
+		.dev_id		= ASUS_WMI_DEVID_UWB,
+		.description	= "UWB power control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "led1",
+		.dev_id		= ASUS_WMI_DEVID_LED1,
+		.description	= "LED1 control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "led2",
+		.dev_id		= ASUS_WMI_DEVID_LED2,
+		.description	= "LED2 control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "led3",
+		.dev_id		= ASUS_WMI_DEVID_LED3,
+		.description	= "LED3 control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "led4",
+		.dev_id		= ASUS_WMI_DEVID_LED4,
+		.description	= "LED4 control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "led5",
+		.dev_id		= ASUS_WMI_DEVID_LED5,
+		.description	= "LED5 control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "led6",
+		.dev_id		= ASUS_WMI_DEVID_LED6,
+		.description	= "LED6 control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "backlight",
+		.dev_id		= ASUS_WMI_DEVID_BACKLIGHT,
+		.description	= "LCD backlight on/off control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "brightness",
+		.dev_id		= ASUS_WMI_DEVID_BRIGHTNESS,
+		.description	= "LCD backlight brightness control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "kbd_backlight",
+		.dev_id		= ASUS_WMI_DEVID_KBD_BACKLIGHT,
+		.description	= "Keyboard backlight brightness control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "light_sensor",
+		.dev_id		= ASUS_WMI_DEVID_LIGHT_SENSOR,
+		.description	= "Ambient light sensor",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "camera",
+		.dev_id		= ASUS_WMI_DEVID_CAMERA,
+		.description	= "Camera power control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "cardreader",
+		.dev_id		= ASUS_WMI_DEVID_CARDREADER,
+		.description	= "Cardreader power control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "touchpad",
+		.dev_id		= ASUS_WMI_DEVID_TOUCHPAD,
+		.description	= "Touchpad control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "touchpad_led",
+		.dev_id		= ASUS_WMI_DEVID_TOUCHPAD_LED,
+		.description	= "Touchpad LED control",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{
+		.name		= "themperature",
+		.dev_id		= ASUS_WMI_DEVID_THERMAL_CTRL,
+		.description	= "Temperature (C)",
+		.access		= CTLTYPE_INT | CTLFLAG_RD
+	},
+	{
+		.name		= "fan_speed",
+		.dev_id		= ASUS_WMI_DEVID_FAN_CTRL,
+		.description	= "Fan speed (0-3)",
+		.access		= CTLTYPE_INT | CTLFLAG_RD
+	},
+	{
+		.name		= "processor_state",
+		.dev_id		= ASUS_WMI_DEVID_PROCESSOR_STATE,
+		.description	= "Processor state",
+		.access		= CTLTYPE_INT | CTLFLAG_RW
+	},
+	{ NULL, 0, NULL, 0 }
+};
+
+ACPI_SERIAL_DECL(asus_wmi, "ASUS WMI device");
+
+static void	acpi_asus_wmi_identify(driver_t *driver, device_t parent);
+static int	acpi_asus_wmi_probe(device_t dev);
+static int	acpi_asus_wmi_attach(device_t dev);
+static int	acpi_asus_wmi_detach(device_t dev);
+
+static int	acpi_asus_wmi_sysctl(SYSCTL_HANDLER_ARGS);
+static int	acpi_asus_wmi_sysctl_set(struct acpi_asus_wmi_softc *sc, int dev_id,
+		    int arg, int oldarg);
+static int	acpi_asus_wmi_sysctl_get(struct acpi_asus_wmi_softc *sc, int dev_id);
+static int	acpi_asus_wmi_evaluate_method(device_t wmi_dev, int method,
+		    UINT32 arg0, UINT32 arg1, UINT32 *retval);
+static int	acpi_wpi_asus_get_devstate(struct acpi_asus_wmi_softc *sc,
+		    UINT32 dev_id, UINT32 *retval);
+static int	acpi_wpi_asus_set_devstate(struct acpi_asus_wmi_softc *sc,
+		    UINT32 dev_id, UINT32 ctrl_param, UINT32 *retval);
+static void	acpi_asus_wmi_notify(ACPI_HANDLE h, UINT32 notify, void *context);
+
+static device_method_t acpi_asus_wmi_methods[] = {
+	DEVMETHOD(device_identify, acpi_asus_wmi_identify),
+	DEVMETHOD(device_probe, acpi_asus_wmi_probe),
+	DEVMETHOD(device_attach, acpi_asus_wmi_attach),
+	DEVMETHOD(device_detach, acpi_asus_wmi_detach),
+	{0, 0}
+};
+
+static driver_t	acpi_asus_wmi_driver = {
+	"acpi_asus_wmi",
+	acpi_asus_wmi_methods,
+	sizeof(struct acpi_asus_wmi_softc),
+};
+
+static devclass_t acpi_asus_wmi_devclass;
+
+DRIVER_MODULE(acpi_asus_wmi, acpi_wmi, acpi_asus_wmi_driver,
+    acpi_asus_wmi_devclass, 0, 0);
+MODULE_DEPEND(acpi_asus_wmi, acpi_wmi, 1, 1, 1);
+MODULE_DEPEND(acpi_asus_wmi, acpi, 1, 1, 1);
+
+static void
+acpi_asus_wmi_identify(driver_t *driver, device_t parent)
+{
+
+	/* Don't do anything if driver is disabled. */
+	if (acpi_disabled("asus_wmi"))
+		return;
+
+	/* Add only a single device instance. */
+	if (device_find_child(parent, "acpi_asus_wmi", -1) != NULL)
+		return;
+
+	/* Check management GUID to see whether system is compatible. */
+	if (!ACPI_WMI_PROVIDES_GUID_STRING(parent,
+	    ACPI_ASUS_WMI_MGMT_GUID))
+		return;
+
+	if (BUS_ADD_CHILD(parent, 0, "acpi_asus_wmi", -1) == NULL)
+		device_printf(parent, "add acpi_asus_wmi child failed\n");
+}
+
+static int
+acpi_asus_wmi_probe(device_t dev)
+{
+
+	if (!ACPI_WMI_PROVIDES_GUID_STRING(device_get_parent(dev),
+	    ACPI_ASUS_WMI_MGMT_GUID))
+		return (EINVAL);
+	device_set_desc(dev, "ASUS WMI device");
+	return (0);
+}
+
+static int
+acpi_asus_wmi_attach(device_t dev)
+{
+	struct acpi_asus_wmi_softc	*sc;
+	UINT32			val;
+	int			dev_id, i;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__);
+
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+	sc->wmi_dev = device_get_parent(dev);
+	sc->handle_keys = 1;
+
+	/* Check management GUID. */
+	if (!ACPI_WMI_PROVIDES_GUID_STRING(sc->wmi_dev,
+	    ACPI_ASUS_WMI_MGMT_GUID)) {
+		device_printf(dev,
+		    "WMI device does not provide the ASUS management GUID\n");
+		return (EINVAL);
+	}
+
+	/* Find proper DSTS method. */
+	sc->dsts_id = ASUS_WMI_METHODID_DSTS;
+next:
+	for (i = 0; acpi_asus_wmi_sysctls[i].name != NULL; ++i) {
+		dev_id = acpi_asus_wmi_sysctls[i].dev_id;
+		if (acpi_wpi_asus_get_devstate(sc, dev_id, &val))
+			continue;
+		break;
+	}
+	if (acpi_asus_wmi_sysctls[i].name == NULL) {
+		if (sc->dsts_id == ASUS_WMI_METHODID_DSTS) {
+			sc->dsts_id = ASUS_WMI_METHODID_DSTS2;
+			goto next;
+		} else {
+			device_printf(dev, "Can not detect DSTS method ID\n");
+			return (EINVAL);
+		}
+	}
+
+	/* Find proper and attach to notufy GUID. */
+	if (ACPI_WMI_PROVIDES_GUID_STRING(sc->wmi_dev,
+	    ACPI_ASUS_WMI_EVENT_GUID))
+		sc->notify_guid = ACPI_ASUS_WMI_EVENT_GUID;
+	else if (ACPI_WMI_PROVIDES_GUID_STRING(sc->wmi_dev,
+	    ACPI_EEEPC_WMI_EVENT_GUID))
+		sc->notify_guid = ACPI_EEEPC_WMI_EVENT_GUID;
+	else
+		sc->notify_guid = NULL;
+	if (sc->notify_guid != NULL) {
+		if (ACPI_WMI_INSTALL_EVENT_HANDLER(sc->wmi_dev,
+		    sc->notify_guid, acpi_asus_wmi_notify, dev))
+			sc->notify_guid = NULL;
+	}
+	if (sc->notify_guid == NULL)
+		device_printf(dev, "Could not install event handler!\n");
+
+	/* Initialize. */
+	if (!acpi_asus_wmi_evaluate_method(sc->wmi_dev,
+	    ASUS_WMI_METHODID_INIT, 0, 0, &val) && bootverbose)
+		device_printf(dev, "Initialization: %#x\n", val);
+	if (!acpi_asus_wmi_evaluate_method(sc->wmi_dev,
+	    ASUS_WMI_METHODID_SPEC, 0, 0x9, &val) && bootverbose)
+		device_printf(dev, "WMI BIOS version: %d.%d\n",
+		    val >> 16, val & 0xFF);
+	if (!acpi_asus_wmi_evaluate_method(sc->wmi_dev,
+	    ASUS_WMI_METHODID_SFUN, 0, 0, &val) && bootverbose)
+		device_printf(dev, "SFUN value: %#x\n", val);
+
+	ACPI_SERIAL_BEGIN(asus_wmi);
+
+	sc->sysctl_ctx = device_get_sysctl_ctx(dev);
+	sc->sysctl_tree = device_get_sysctl_tree(dev);
+	SYSCTL_ADD_INT(sc->sysctl_ctx,
+	    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO,
+	    "handle_keys", CTLFLAG_RW, &sc->handle_keys,
+	    0, "Handle some hardware keys inside the driver");
+	for (i = 0; acpi_asus_wmi_sysctls[i].name != NULL; ++i) {
+		dev_id = acpi_asus_wmi_sysctls[i].dev_id;
+		if (acpi_wpi_asus_get_devstate(sc, dev_id, &val))
+			continue;
+		switch (dev_id) {
+		case ASUS_WMI_DEVID_THERMAL_CTRL:
+		case ASUS_WMI_DEVID_PROCESSOR_STATE:
+		case ASUS_WMI_DEVID_FAN_CTRL:
+		case ASUS_WMI_DEVID_BRIGHTNESS:
+			if (val == 0)
+				continue;
+			break;
+		default:
+			if ((val & ASUS_WMI_DSTS_PRESENCE_BIT) == 0)
+				continue;
+			break;
+		}
+
+		SYSCTL_ADD_PROC(sc->sysctl_ctx,
+		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO,
+		    acpi_asus_wmi_sysctls[i].name,
+		    acpi_asus_wmi_sysctls[i].access,
+		    sc, i, acpi_asus_wmi_sysctl, "I",
+		    acpi_asus_wmi_sysctls[i].description);
+	}
+	ACPI_SERIAL_END(asus_wmi);
+
+	return (0);
+}
+
+static int
+acpi_asus_wmi_detach(device_t dev)
+{
+	struct acpi_asus_wmi_softc *sc = device_get_softc(dev);
+	
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__);
+
+	if (sc->notify_guid)
+		ACPI_WMI_REMOVE_EVENT_HANDLER(dev, sc->notify_guid);
+
+	return (0);
+}
+
+static int
+acpi_asus_wmi_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct acpi_asus_wmi_softc	*sc;
+	int			arg;
+	int			oldarg;
+	int			error = 0;
+	int			function;
+	int			dev_id;
+	
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+
+	sc = (struct acpi_asus_wmi_softc *)oidp->oid_arg1;
+	function = oidp->oid_arg2;
+	dev_id = acpi_asus_wmi_sysctls[function].dev_id;
+
+	ACPI_SERIAL_BEGIN(asus_wmi);
+	arg = acpi_asus_wmi_sysctl_get(sc, dev_id);
+	oldarg = arg;
+	error = sysctl_handle_int(oidp, &arg, 0, req);
+	if (!error && req->newptr != NULL)
+		error = acpi_asus_wmi_sysctl_set(sc, dev_id, arg, oldarg);
+	ACPI_SERIAL_END(asus_wmi);
+
+	return (error);
+}
+
+static int
+acpi_asus_wmi_sysctl_get(struct acpi_asus_wmi_softc *sc, int dev_id)
+{
+	UINT32	val = 0;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+	ACPI_SERIAL_ASSERT(asus_wmi);
+
+	acpi_wpi_asus_get_devstate(sc, dev_id, &val);
+
+	switch(dev_id) {
+	case ASUS_WMI_DEVID_THERMAL_CTRL:
+		val = (val - 2732 + 5) / 10;
+		break;
+	case ASUS_WMI_DEVID_PROCESSOR_STATE:
+	case ASUS_WMI_DEVID_FAN_CTRL:
+		break;
+	case ASUS_WMI_DEVID_BRIGHTNESS:
+		val &= ASUS_WMI_DSTS_BRIGHTNESS_MASK;
+		break;
+	case ASUS_WMI_DEVID_KBD_BACKLIGHT:
+		val &= 0x7;
+		break;
+	default:
+		if (val & ASUS_WMI_DSTS_UNKNOWN_BIT)
+			val = -1;
+		else
+			val = !!(val & ASUS_WMI_DSTS_STATUS_BIT);
+		break;
+	}
+
+	return (val);
+}
+
+static int
+acpi_asus_wmi_sysctl_set(struct acpi_asus_wmi_softc *sc, int dev_id, int arg, int oldarg)
+{
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+	ACPI_SERIAL_ASSERT(asus_wmi);
+
+	switch(dev_id) {
+	case ASUS_WMI_DEVID_KBD_BACKLIGHT:
+		arg = min(0x7, arg);
+		if (arg != 0)
+			arg |= 0x80;
+		break;
+	}
+
+	acpi_wpi_asus_set_devstate(sc, dev_id, arg, NULL);
+
+	return (0);
+}
+
+static __inline void
+acpi_asus_wmi_free_buffer(ACPI_BUFFER* buf) {
+	if (buf && buf->Pointer) {
+		AcpiOsFree(buf->Pointer);
+	}
+}
+
+static void
+acpi_asus_wmi_notify(ACPI_HANDLE h, UINT32 notify, void *context)
+{
+	device_t dev = context;
+	ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, notify);
+	UINT32 val;
+	int code = 0;
+
+	struct acpi_asus_wmi_softc *sc = device_get_softc(dev);
+	ACPI_BUFFER response = { ACPI_ALLOCATE_BUFFER, NULL };
+	ACPI_OBJECT *obj;
+	ACPI_WMI_GET_EVENT_DATA(sc->wmi_dev, notify, &response);
+	obj = (ACPI_OBJECT*) response.Pointer;
+	if (obj && obj->Type == ACPI_TYPE_INTEGER) {
+		code = obj->Integer.Value;
+		acpi_UserNotify("ASUS", ACPI_ROOT_OBJECT,
+		    code);
+	}
+	if (code && sc->handle_keys) {
+		/* Keyboard backlight control. */
+		if (code == 0xc4 || code == 0xc5) {
+			acpi_wpi_asus_get_devstate(sc,
+			    ASUS_WMI_DEVID_KBD_BACKLIGHT, &val);
+			val &= 0x7;
+			if (code == 0xc4) {
+				if (val < 0x7)
+					val++;
+			} else if (val > 0)
+				val--;
+			if (val != 0)
+				val |= 0x80;
+			acpi_wpi_asus_set_devstate(sc,
+			    ASUS_WMI_DEVID_KBD_BACKLIGHT, val, NULL);
+		}
+		/* Touchpad control. */
+		if (code == 0x6b) {
+			acpi_wpi_asus_get_devstate(sc,
+			    ASUS_WMI_DEVID_TOUCHPAD, &val);
+			val = !(val & 1);
+			acpi_wpi_asus_set_devstate(sc,
+			    ASUS_WMI_DEVID_TOUCHPAD, val, NULL);
+		}
+	}
+	acpi_asus_wmi_free_buffer(&response);
+}
+
+static int
+acpi_asus_wmi_evaluate_method(device_t wmi_dev, int method,
+    UINT32 arg0, UINT32 arg1, UINT32 *retval)
+{
+	UINT32		params[2] = { arg0, arg1 };
+	UINT32		result;
+	ACPI_OBJECT	*obj;
+	ACPI_BUFFER	in = { sizeof(params), &params };
+	ACPI_BUFFER	out = { ACPI_ALLOCATE_BUFFER, NULL };
+	
+	if (ACPI_FAILURE(ACPI_WMI_EVALUATE_CALL(wmi_dev,
+	    ACPI_ASUS_WMI_MGMT_GUID, 1, method, &in, &out))) {
+		acpi_asus_wmi_free_buffer(&out);
+		return (-EINVAL);
+	}
+	obj = out.Pointer;
+	if (obj && obj->Type == ACPI_TYPE_INTEGER)
+		result = (UINT32) obj->Integer.Value;
+	else
+		result = 0;
+	acpi_asus_wmi_free_buffer(&out);
+	if (retval)
+		*retval = result;
+	return (result == ASUS_WMI_UNSUPPORTED_METHOD ? -ENODEV : 0);
+}
+
+static int
+acpi_wpi_asus_get_devstate(struct acpi_asus_wmi_softc *sc,
+    UINT32 dev_id, UINT32 *retval)
+{
+
+	return (acpi_asus_wmi_evaluate_method(sc->wmi_dev,
+	    sc->dsts_id, dev_id, 0, retval));
+}
+
+static int
+acpi_wpi_asus_set_devstate(struct acpi_asus_wmi_softc *sc,
+    UINT32 dev_id, UINT32 ctrl_param, UINT32 *retval)
+{
+
+	return (acpi_asus_wmi_evaluate_method(sc->wmi_dev,
+	    ASUS_WMI_METHODID_DEVS, dev_id, ctrl_param, retval));
+}
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpi_support/acpi_ibm.c
--- a/head/sys/dev/acpi_support/acpi_ibm.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpi_support/acpi_ibm.c	Wed Jul 25 17:04:43 2012 +0300
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/acpi_support/acpi_ibm.c 237493 2012-06-23 18:43:54Z iwasaki $");
 
 /*
  * Driver for extra ACPI-controlled gadgets found on IBM ThinkPad laptops.
@@ -50,6 +50,8 @@
 #include <sys/module.h>
 #include <dev/acpica/acpivar.h>
 #include <dev/led/led.h>
+#include <sys/power.h>
+#include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <isa/rtc.h>
 
@@ -70,6 +72,7 @@
 #define ACPI_IBM_METHOD_FANLEVEL	11
 #define ACPI_IBM_METHOD_FANSTATUS	12
 #define ACPI_IBM_METHOD_THERMAL		13
+#define ACPI_IBM_METHOD_HANDLEREVENTS	14
 
 /* Hotkeys/Buttons */
 #define IBM_RTC_HOTKEY1			0x64
@@ -126,6 +129,21 @@
 #define IBM_NAME_EVENTS_GET		"MHKP"
 #define IBM_NAME_EVENTS_AVAILMASK	"MHKA"
 
+/* Event Code */
+#define IBM_EVENT_LCD_BACKLIGHT		0x03
+#define IBM_EVENT_SUSPEND_TO_RAM	0x04
+#define IBM_EVENT_BLUETOOTH		0x05
+#define IBM_EVENT_SCREEN_EXPAND		0x07
+#define IBM_EVENT_SUSPEND_TO_DISK	0x0c
+#define IBM_EVENT_BRIGHTNESS_UP		0x10
+#define IBM_EVENT_BRIGHTNESS_DOWN	0x11
+#define IBM_EVENT_THINKLIGHT		0x12
+#define IBM_EVENT_ZOOM			0x14
+#define IBM_EVENT_VOLUME_UP		0x15
+#define IBM_EVENT_VOLUME_DOWN		0x16
+#define IBM_EVENT_MUTE			0x17
+#define IBM_EVENT_ACCESS_IBM_BUTTON	0x18
+
 #define ABS(x) (((x) < 0)? -(x) : (x))
 
 struct acpi_ibm_softc {
@@ -164,6 +182,8 @@
 	int		events_mask_supported;
 	int		events_enable;
 
+	unsigned int	handler_events;
+
 	struct sysctl_ctx_list	*sysctl_ctx;
 	struct sysctl_oid	*sysctl_tree;
 };
@@ -267,8 +287,15 @@
 
 static int	acpi_ibm_eventmask_set(struct acpi_ibm_softc *sc, int val);
 static int	acpi_ibm_thermal_sysctl(SYSCTL_HANDLER_ARGS);
+static int	acpi_ibm_handlerevents_sysctl(SYSCTL_HANDLER_ARGS);
 static void	acpi_ibm_notify(ACPI_HANDLE h, UINT32 notify, void *context);
 
+static int	acpi_ibm_brightness_set(struct acpi_ibm_softc *sc, int arg);
+static int	acpi_ibm_bluetooth_set(struct acpi_ibm_softc *sc, int arg);
+static int	acpi_ibm_thinklight_set(struct acpi_ibm_softc *sc, int arg);
+static int	acpi_ibm_volume_set(struct acpi_ibm_softc *sc, int arg);
+static int	acpi_ibm_mute_set(struct acpi_ibm_softc *sc, int arg);
+
 static device_method_t acpi_ibm_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, acpi_ibm_probe),
@@ -404,6 +431,15 @@
 		    "Thermal zones");
 	}
 
+	/* Hook up handlerevents node */
+	if (acpi_ibm_sysctl_init(sc, ACPI_IBM_METHOD_HANDLEREVENTS)) {
+		SYSCTL_ADD_PROC(sc->sysctl_ctx,
+		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO,
+		    "handlerevents", CTLTYPE_STRING | CTLFLAG_RW,
+		    sc, 0, acpi_ibm_handlerevents_sysctl, "I",
+		    "devd(8) events handled by acpi_ibm");
+	}
+
 	/* Handle notifies */
 	AcpiInstallNotifyHandler(sc->handle, ACPI_DEVICE_NOTIFY,
 	    acpi_ibm_notify, dev);
@@ -656,10 +692,8 @@
 static int
 acpi_ibm_sysctl_set(struct acpi_ibm_softc *sc, int method, int arg)
 {
-	int			val, step;
+	int			val;
 	UINT64			val_ec;
-	ACPI_OBJECT		Arg;
-	ACPI_OBJECT_LIST	Args;
 	ACPI_STATUS		status;
 
 	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
@@ -683,101 +717,23 @@
 		break;
 
 	case ACPI_IBM_METHOD_BRIGHTNESS:
-		if (arg < 0 || arg > 7)
-			return (EINVAL);
-
-		if (sc->cmos_handle) {
-			/* Read the current brightness */
-			status = ACPI_EC_READ(sc->ec_dev, IBM_EC_BRIGHTNESS, &val_ec, 1);
-			if (ACPI_FAILURE(status))
-				return (status);
-			val = val_ec & IBM_EC_MASK_BRI;
-
-			Args.Count = 1;
-			Args.Pointer = &Arg;
-			Arg.Type = ACPI_TYPE_INTEGER;
-			Arg.Integer.Value = (arg > val) ? IBM_CMOS_BRIGHTNESS_UP : IBM_CMOS_BRIGHTNESS_DOWN;
-
-			step = (arg > val) ? 1 : -1;
-			for (int i = val; i != arg; i += step) {
-				status = AcpiEvaluateObject(sc->cmos_handle, NULL, &Args, NULL);
-				if (ACPI_FAILURE(status))
-					break;
-			}
-		}
-		return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_BRIGHTNESS, arg, 1);
+		return acpi_ibm_brightness_set(sc, arg);
 		break;
 
 	case ACPI_IBM_METHOD_VOLUME:
-		if (arg < 0 || arg > 14)
-			return (EINVAL);
-
-		status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1);
-		if (ACPI_FAILURE(status))
-			return (status);
-
-		if (sc->cmos_handle) {
-			val = val_ec & IBM_EC_MASK_VOL;
-
-			Args.Count = 1;
-			Args.Pointer = &Arg;
-			Arg.Type = ACPI_TYPE_INTEGER;
-			Arg.Integer.Value = (arg > val) ? IBM_CMOS_VOLUME_UP : IBM_CMOS_VOLUME_DOWN;
-
-			step = (arg > val) ? 1 : -1;
-			for (int i = val; i != arg; i += step) {
-				status = AcpiEvaluateObject(sc->cmos_handle, NULL, &Args, NULL);
-				if (ACPI_FAILURE(status))
-					break;
-			}
-		}
-		return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_VOLUME, arg + (val_ec & (~IBM_EC_MASK_VOL)), 1);
+		return acpi_ibm_volume_set(sc, arg);
 		break;
 
 	case ACPI_IBM_METHOD_MUTE:
-		if (arg < 0 || arg > 1)
-			return (EINVAL);
-
-		status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1);
-		if (ACPI_FAILURE(status))
-			return (status);
-
-		if (sc->cmos_handle) {
-			Args.Count = 1;
-			Args.Pointer = &Arg;
-			Arg.Type = ACPI_TYPE_INTEGER;
-			Arg.Integer.Value = IBM_CMOS_VOLUME_MUTE;
-
-			status = AcpiEvaluateObject(sc->cmos_handle, NULL, &Args, NULL);
-			if (ACPI_FAILURE(status))
-				break;
-		}
-		return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_VOLUME, (arg==1) ? val_ec | IBM_EC_MASK_MUTE : val_ec & (~IBM_EC_MASK_MUTE), 1);
+		return acpi_ibm_mute_set(sc, arg);
 		break;
 
 	case ACPI_IBM_METHOD_THINKLIGHT:
-		if (arg < 0 || arg > 1)
-			return (EINVAL);
-
-		if (sc->light_set_supported) {
-			Args.Count = 1;
-			Args.Pointer = &Arg;
-			Arg.Type = ACPI_TYPE_INTEGER;
-			Arg.Integer.Value = arg ? sc->light_cmd_on : sc->light_cmd_off;
-
-			status = AcpiEvaluateObject(sc->light_handle, NULL, &Args, NULL);
-			if (ACPI_SUCCESS(status))
-				sc->light_val = arg;
-			return (status);
-		}
+		return acpi_ibm_thinklight_set(sc, arg);
 		break;
 
 	case ACPI_IBM_METHOD_BLUETOOTH:
-		if (arg < 0 || arg > 1)
-			return (EINVAL);
-
-		val = (arg == 1) ? sc->wlan_bt_flags | IBM_NAME_MASK_BT : sc->wlan_bt_flags & (~IBM_NAME_MASK_BT);
-		return acpi_SetInteger(sc->handle, IBM_NAME_WLAN_BT_SET, val);
+		return acpi_ibm_bluetooth_set(sc, arg);
 		break;
 
 	case ACPI_IBM_METHOD_FANLEVEL:
@@ -898,6 +854,9 @@
 			return (TRUE);
 		}
 		return (FALSE);
+
+	case ACPI_IBM_METHOD_HANDLEREVENTS:
+		return (TRUE);
 	}
 	return (FALSE);
 }
@@ -937,6 +896,328 @@
 	return (error);
 }
 
+static int
+acpi_ibm_handlerevents_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct acpi_ibm_softc	*sc;
+	int			error = 0;
+	struct sbuf		sb;
+	char			*cp, *ep;
+	int			l, val;
+	unsigned int		handler_events;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+
+	sc = (struct acpi_ibm_softc *)oidp->oid_arg1;
+
+	if (sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND) == NULL)
+		return (ENOMEM);
+
+	ACPI_SERIAL_BEGIN(ibm);
+
+	/* Get old values if this is a get request. */
+	if (req->newptr == NULL) {
+		for (int i = 0; i < 8 * sizeof(sc->handler_events); i++)
+			if (sc->handler_events & (1 << i))
+				sbuf_printf(&sb, "0x%02x ", i + 1);
+		if (sbuf_len(&sb) == 0)
+			sbuf_printf(&sb, "NONE");
+	}
+
+	sbuf_trim(&sb);
+	sbuf_finish(&sb);
+
+	/* Copy out the old values to the user. */
+	error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
+	sbuf_delete(&sb);
+
+	if (error != 0 || req->newptr == NULL)
+		goto out;
+
+	/* If the user is setting a string, parse it. */
+	handler_events = 0;
+	cp = (char *)req->newptr;
+	while (*cp) {
+		if (isspace(*cp)) {
+			cp++;
+			continue;
+		}
+
+		ep = cp;
+
+		while (*ep && !isspace(*ep))
+			ep++;
+
+		l = ep - cp;
+		if (l == 0)
+			break;
+
+		if (strncmp(cp, "NONE", 4) == 0) {
+			cp = ep;
+			continue;
+		}
+
+		if (l >= 3 && cp[0] == '0' && (cp[1] == 'X' || cp[1] == 'x'))
+			val = strtoul(cp, &ep, 16);
+		else
+			val = strtoul(cp, &ep, 10);
+
+		if (val == 0 || ep == cp || val >= 8 * sizeof(handler_events)) {
+			cp[l] = '\0';
+			device_printf(sc->dev, "invalid event code: %s\n", cp);
+			error = EINVAL;
+			goto out;
+		}
+
+		handler_events |= 1 << (val - 1);
+
+		cp = ep;
+	}
+
+	sc->handler_events = handler_events;
+out:
+	ACPI_SERIAL_END(ibm);
+	return (error);
+}
+
+static int
+acpi_ibm_brightness_set(struct acpi_ibm_softc *sc, int arg)
+{
+	int			val, step;
+	UINT64			val_ec;
+	ACPI_OBJECT		Arg;
+	ACPI_OBJECT_LIST	Args;
+	ACPI_STATUS		status;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+	ACPI_SERIAL_ASSERT(ibm);
+
+	if (arg < 0 || arg > 7)
+		return (EINVAL);
+
+	/* Read the current brightness */
+	status = ACPI_EC_READ(sc->ec_dev, IBM_EC_BRIGHTNESS, &val_ec, 1);
+	if (ACPI_FAILURE(status))
+		return (status);
+
+	if (sc->cmos_handle) {
+		val = val_ec & IBM_EC_MASK_BRI;
+
+		Args.Count = 1;
+		Args.Pointer = &Arg;
+		Arg.Type = ACPI_TYPE_INTEGER;
+		Arg.Integer.Value = (arg > val) ? IBM_CMOS_BRIGHTNESS_UP :
+						  IBM_CMOS_BRIGHTNESS_DOWN;
+
+		step = (arg > val) ? 1 : -1;
+		for (int i = val; i != arg; i += step) {
+			status = AcpiEvaluateObject(sc->cmos_handle, NULL,
+						    &Args, NULL);
+			if (ACPI_FAILURE(status)) {
+				/* Record the last value */
+				if (i != val) {
+					ACPI_EC_WRITE(sc->ec_dev,
+					    IBM_EC_BRIGHTNESS, i - step, 1);
+				}
+				return (status);
+			}
+		}
+	}
+
+	return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_BRIGHTNESS, arg, 1);
+}
+
+static int
+acpi_ibm_bluetooth_set(struct acpi_ibm_softc *sc, int arg)
+{
+	int			val;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+	ACPI_SERIAL_ASSERT(ibm);
+
+	if (arg < 0 || arg > 1)
+		return (EINVAL);
+
+	val = (arg == 1) ? sc->wlan_bt_flags | IBM_NAME_MASK_BT :
+			   sc->wlan_bt_flags & (~IBM_NAME_MASK_BT);
+	return acpi_SetInteger(sc->handle, IBM_NAME_WLAN_BT_SET, val);
+}
+
+static int
+acpi_ibm_thinklight_set(struct acpi_ibm_softc *sc, int arg)
+{
+	ACPI_OBJECT		Arg;
+	ACPI_OBJECT_LIST	Args;
+	ACPI_STATUS		status;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+	ACPI_SERIAL_ASSERT(ibm);
+
+	if (arg < 0 || arg > 1)
+		return (EINVAL);
+
+	if (sc->light_set_supported) {
+		Args.Count = 1;
+		Args.Pointer = &Arg;
+		Arg.Type = ACPI_TYPE_INTEGER;
+		Arg.Integer.Value = arg ? sc->light_cmd_on : sc->light_cmd_off;
+
+		status = AcpiEvaluateObject(sc->light_handle, NULL,
+					    &Args, NULL);
+		if (ACPI_SUCCESS(status))
+			sc->light_val = arg;
+		return (status);
+	}
+
+	return (0);
+}
+
+static int
+acpi_ibm_volume_set(struct acpi_ibm_softc *sc, int arg)
+{
+	int			val, step;
+	UINT64			val_ec;
+	ACPI_OBJECT		Arg;
+	ACPI_OBJECT_LIST	Args;
+	ACPI_STATUS		status;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+	ACPI_SERIAL_ASSERT(ibm);
+
+	if (arg < 0 || arg > 14)
+		return (EINVAL);
+
+	/* Read the current volume */
+	status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1);
+	if (ACPI_FAILURE(status))
+		return (status);
+
+	if (sc->cmos_handle) {
+		val = val_ec & IBM_EC_MASK_VOL;
+
+		Args.Count = 1;
+		Args.Pointer = &Arg;
+		Arg.Type = ACPI_TYPE_INTEGER;
+		Arg.Integer.Value = (arg > val) ? IBM_CMOS_VOLUME_UP :
+						  IBM_CMOS_VOLUME_DOWN;
+
+		step = (arg > val) ? 1 : -1;
+		for (int i = val; i != arg; i += step) {
+			status = AcpiEvaluateObject(sc->cmos_handle, NULL,
+						    &Args, NULL);
+			if (ACPI_FAILURE(status)) {
+				/* Record the last value */
+				if (i != val) {
+					val_ec = i - step +
+						 (val_ec & (~IBM_EC_MASK_VOL));
+					ACPI_EC_WRITE(sc->ec_dev, IBM_EC_VOLUME,
+						      val_ec, 1);
+				}
+				return (status);
+			}
+		}
+	}
+
+	val_ec = arg + (val_ec & (~IBM_EC_MASK_VOL));
+	return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_VOLUME, val_ec, 1);
+}
+
+static int
+acpi_ibm_mute_set(struct acpi_ibm_softc *sc, int arg)
+{
+	UINT64			val_ec;
+	ACPI_OBJECT		Arg;
+	ACPI_OBJECT_LIST	Args;
+	ACPI_STATUS		status;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+	ACPI_SERIAL_ASSERT(ibm);
+
+	if (arg < 0 || arg > 1)
+		return (EINVAL);
+
+	status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1);
+	if (ACPI_FAILURE(status))
+		return (status);
+
+	if (sc->cmos_handle) {
+		Args.Count = 1;
+		Args.Pointer = &Arg;
+		Arg.Type = ACPI_TYPE_INTEGER;
+		Arg.Integer.Value = IBM_CMOS_VOLUME_MUTE;
+
+		status = AcpiEvaluateObject(sc->cmos_handle, NULL, &Args, NULL);
+		if (ACPI_FAILURE(status))
+			return (status);
+	}
+
+	val_ec = (arg == 1) ? val_ec | IBM_EC_MASK_MUTE :
+			      val_ec & (~IBM_EC_MASK_MUTE);
+	return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_VOLUME, val_ec, 1);
+}
+
+static void
+acpi_ibm_eventhandler(struct acpi_ibm_softc *sc, int arg)
+{
+	int			val;
+	UINT64			val_ec;
+	ACPI_STATUS		status;
+
+	ACPI_SERIAL_BEGIN(ibm);
+	switch (arg) {
+	case IBM_EVENT_SUSPEND_TO_RAM:
+		power_pm_suspend(POWER_SLEEP_STATE_SUSPEND);
+		break;
+
+	case IBM_EVENT_BLUETOOTH:
+		acpi_ibm_bluetooth_set(sc, (sc->wlan_bt_flags == 0));
+		break;
+
+	case IBM_EVENT_BRIGHTNESS_UP:
+	case IBM_EVENT_BRIGHTNESS_DOWN:
+		/* Read the current brightness */
+		status = ACPI_EC_READ(sc->ec_dev, IBM_EC_BRIGHTNESS,
+				      &val_ec, 1);
+		if (ACPI_FAILURE(status))
+			return;
+
+		val = val_ec & IBM_EC_MASK_BRI;
+		val = (arg == IBM_EVENT_BRIGHTNESS_UP) ? val + 1 : val - 1;
+		acpi_ibm_brightness_set(sc, val);
+		break;
+
+	case IBM_EVENT_THINKLIGHT:
+		acpi_ibm_thinklight_set(sc, (sc->light_val == 0));
+		break;
+
+	case IBM_EVENT_VOLUME_UP:
+	case IBM_EVENT_VOLUME_DOWN:
+		/* Read the current volume */
+		status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1);
+		if (ACPI_FAILURE(status))
+			return;
+
+		val = val_ec & IBM_EC_MASK_VOL;
+		val = (arg == IBM_EVENT_VOLUME_UP) ? val + 1 : val - 1;
+		acpi_ibm_volume_set(sc, val);
+		break;
+
+	case IBM_EVENT_MUTE:
+		/* Read the current value */
+		status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1);
+		if (ACPI_FAILURE(status))
+			return;
+
+		val = ((val_ec & IBM_EC_MASK_MUTE) == IBM_EC_MASK_MUTE);
+		acpi_ibm_mute_set(sc, (val == 0));
+		break;
+
+	default:
+		break;
+	}
+	ACPI_SERIAL_END(ibm);
+}
+
 static void
 acpi_ibm_notify(ACPI_HANDLE h, UINT32 notify, void *context)
 {
@@ -965,6 +1246,10 @@
 				break;
 			}
 
+			/* Execute event handler */
+			if (sc->handler_events & (1 << (arg - 1)))
+				acpi_ibm_eventhandler(sc, (arg & 0xff));
+
 			/* Notify devd(8) */
 			acpi_UserNotify("IBM", h, (arg & 0xff));
 			break;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/Osd/OsdSchedule.c
--- a/head/sys/dev/acpica/Osd/OsdSchedule.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/Osd/OsdSchedule.c	Wed Jul 25 17:04:43 2012 +0300
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/acpica/Osd/OsdSchedule.c 232132 2012-02-24 23:15:21Z jkim $");
+__FBSDID("$FreeBSD: head/sys/dev/acpica/Osd/OsdSchedule.c 235945 2012-05-24 23:12:30Z jkim $");
 
 #include "opt_acpi.h"
 #include <sys/param.h>
@@ -215,6 +215,20 @@
 }
 
 void
+AcpiOsWaitEventsComplete(void)
+{
+	int i;
+
+	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
+
+	for (i = 0; i < acpi_max_tasks; i++)
+		if ((atomic_load_acq_int(&acpi_tasks[i].at_flag) &
+		    ACPI_TASK_ENQUEUED) != 0)
+			taskqueue_drain(acpi_taskq, &acpi_tasks[i].at_task);
+	return_VOID;
+}
+
+void
 AcpiOsSleep(UINT64 Milliseconds)
 {
     int		timo;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/Osd/OsdSynch.c
--- a/head/sys/dev/acpica/Osd/OsdSynch.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/Osd/OsdSynch.c	Wed Jul 25 17:04:43 2012 +0300
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/acpica/Osd/OsdSynch.c 231474 2012-02-10 23:30:29Z jkim $");
+__FBSDID("$FreeBSD: head/sys/dev/acpica/Osd/OsdSynch.c 236424 2012-06-01 21:33:33Z jkim $");
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
@@ -208,7 +208,7 @@
 			tmo -= slptick;
 		}
 	}
-	if (status == AE_OK)
+	if (ACPI_SUCCESS(status))
 		as->as_units -= Units;
 
 	mtx_unlock(&as->as_lock);
@@ -402,7 +402,7 @@
 			tmo -= slptick;
 		}
 	}
-	if (status == AE_OK)
+	if (ACPI_SUCCESS(status))
 		am->am_owner = curthread;
 
 	mtx_unlock(&am->am_lock);
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/acpi.c
--- a/head/sys/dev/acpica/acpi.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/acpi.c	Wed Jul 25 17:04:43 2012 +0300
@@ -28,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi.c 233579 2012-03-27 23:26:58Z jkim $");
+__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi.c 236408 2012-06-01 17:00:33Z jkim $");
 
 #include "opt_acpi.h"
 #include <sys/param.h>
@@ -2449,15 +2449,29 @@
 
 #if defined(__amd64__) || defined(__i386__)
 static void
+acpi_sleep_force_task(void *context)
+{
+    struct acpi_softc *sc = (struct acpi_softc *)context;
+
+    if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
+	device_printf(sc->acpi_dev, "force sleep state S%d failed\n",
+	    sc->acpi_next_sstate);
+}
+
+static void
 acpi_sleep_force(void *arg)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
 
     device_printf(sc->acpi_dev,
 	"suspend request timed out, forcing sleep now\n");
-    if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
-	device_printf(sc->acpi_dev, "force sleep state S%d failed\n",
-	    sc->acpi_next_sstate);
+    /*
+     * XXX Suspending from callout cause the freeze in DEVICE_SUSPEND().
+     * Suspend from acpi_task thread in stead.
+     */
+    if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
+	acpi_sleep_force_task, sc)))
+	device_printf(sc->acpi_dev, "AcpiOsExecute() for sleeping failed\n");
 }
 #endif
 
@@ -2479,14 +2493,20 @@
     if (!acpi_sleep_states[state])
 	return (EOPNOTSUPP);
 
-    ACPI_LOCK(acpi);
-
     /* If a suspend request is already in progress, just return. */
     if (sc->acpi_next_sstate != 0) {
-    	ACPI_UNLOCK(acpi);
 	return (0);
     }
 
+    /* Wait until sleep is enabled. */
+    while (sc->acpi_sleep_disabled) {
+	AcpiOsSleep(1000);
+    }
+
+    ACPI_LOCK(acpi);
+
+    sc->acpi_next_sstate = state;
+
     /* S5 (soft-off) should be entered directly with no waiting. */
     if (state == ACPI_STATE_S5) {
     	ACPI_UNLOCK(acpi);
@@ -2495,7 +2515,6 @@
     }
 
     /* Record the pending state and notify all apm devices. */
-    sc->acpi_next_sstate = state;
     STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
 	clone->notify_status = APM_EV_NONE;
 	if ((clone->flags & ACPI_EVF_DEVD) == 0) {
@@ -2647,6 +2666,7 @@
     register_t intr;
     ACPI_STATUS status;
     enum acpi_sleep_state slp_state;
+    int sleep_result;
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
@@ -2726,16 +2746,25 @@
     if (sc->acpi_sleep_delay > 0)
 	DELAY(sc->acpi_sleep_delay * 1000000);
 
+    intr = intr_disable();
     if (state != ACPI_STATE_S1) {
-	if (acpi_sleep_machdep(sc, state))
+	sleep_result = acpi_sleep_machdep(sc, state);
+	acpi_wakeup_machdep(sc, state, sleep_result, 0);
+	AcpiLeaveSleepStatePrep(state, acpi_sleep_flags);
+	intr_restore(intr);
+
+	/* call acpi_wakeup_machdep() again with interrupt enabled */
+	acpi_wakeup_machdep(sc, state, sleep_result, 1);
+
+	if (sleep_result == -1)
 		goto backout;
 
 	/* Re-enable ACPI hardware on wakeup from sleep state 4. */
 	if (state == ACPI_STATE_S4)
 	    AcpiEnable();
     } else {
-	intr = intr_disable();
 	status = AcpiEnterSleepState(state, acpi_sleep_flags);
+	AcpiLeaveSleepStatePrep(state, acpi_sleep_flags);
 	intr_restore(intr);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n",
@@ -2754,12 +2783,10 @@
 	acpi_wake_prep_walk(state);
 	sc->acpi_sstate = ACPI_STATE_S0;
     }
-    if (slp_state >= ACPI_SS_SLP_PREP) {
-	AcpiLeaveSleepStatePrep(state, acpi_sleep_flags);
-	AcpiLeaveSleepState(state);
-    }
     if (slp_state >= ACPI_SS_DEV_SUSPEND)
 	DEVICE_RESUME(root_bus);
+    if (slp_state >= ACPI_SS_SLP_PREP)
+	AcpiLeaveSleepState(state);
     if (slp_state >= ACPI_SS_SLEPT) {
 	acpi_resync_clock(sc);
 	acpi_enable_fixed_events(sc);
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/acpi_cpu.c
--- a/head/sys/dev/acpica/acpi_cpu.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/acpi_cpu.c	Wed Jul 25 17:04:43 2012 +0300
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_cpu.c 227843 2011-11-22 21:28:20Z marius $");
+__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_cpu.c 238418 2012-07-13 08:11:55Z avg $");
 
 #include "opt_acpi.h"
 #include <sys/param.h>
@@ -89,6 +89,7 @@
     struct sysctl_ctx_list cpu_sysctl_ctx;
     struct sysctl_oid	*cpu_sysctl_tree;
     int			 cpu_cx_lowest;
+    int			 cpu_cx_lowest_lim;
     char 		 cpu_cx_supported[64];
     int			 cpu_rid;
 };
@@ -124,6 +125,13 @@
 #define PIIX4_STOP_BREAK_MASK	(PIIX4_BRLD_EN_IRQ0 | PIIX4_BRLD_EN_IRQ | PIIX4_BRLD_EN_IRQ8)
 #define PIIX4_PCNTRL_BST_EN	(1<<10)
 
+/* Allow users to ignore processor orders in MADT. */
+static int cpu_unordered;
+TUNABLE_INT("debug.acpi.cpu_unordered", &cpu_unordered);
+SYSCTL_INT(_debug_acpi, OID_AUTO, cpu_unordered, CTLFLAG_RDTUN,
+    &cpu_unordered, 0,
+    "Do not use the MADT to match ACPI Processor objects to CPUs.");
+
 /* Platform hardware resource information. */
 static uint32_t		 cpu_smi_cmd;	/* Value to write to SMI_CMD. */
 static uint8_t		 cpu_cst_cnt;	/* Indicate we are _CST aware. */
@@ -131,13 +139,12 @@
 
 /* Runtime state. */
 static int		 cpu_disable_idle; /* Disable entry to idle function */
-static int		 cpu_cx_count;	/* Number of valid Cx states */
 
 /* Values for sysctl. */
 static struct sysctl_ctx_list cpu_sysctl_ctx;
 static struct sysctl_oid *cpu_sysctl_tree;
 static int		 cpu_cx_generic;
-static int		 cpu_cx_lowest;
+static int		 cpu_cx_lowest_lim;
 
 static device_t		*cpu_devices;
 static int		 cpu_ndevices;
@@ -148,7 +155,7 @@
 static int	acpi_cpu_attach(device_t dev);
 static int	acpi_cpu_suspend(device_t dev);
 static int	acpi_cpu_resume(device_t dev);
-static int	acpi_pcpu_get_id(uint32_t idx, uint32_t *acpi_id,
+static int	acpi_pcpu_get_id(device_t dev, uint32_t *acpi_id,
 		    uint32_t *cpu_id);
 static struct resource_list *acpi_cpu_get_rlist(device_t dev, device_t child);
 static device_t	acpi_cpu_add_child(device_t dev, u_int order, const char *name,
@@ -166,7 +173,7 @@
 static void	acpi_cpu_notify(ACPI_HANDLE h, UINT32 notify, void *context);
 static int	acpi_cpu_quirks(void);
 static int	acpi_cpu_usage_sysctl(SYSCTL_HANDLER_ARGS);
-static int	acpi_cpu_set_cx_lowest(struct acpi_cpu_softc *sc, int val);
+static int	acpi_cpu_set_cx_lowest(struct acpi_cpu_softc *sc);
 static int	acpi_cpu_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_cpu_global_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS);
 
@@ -245,7 +252,7 @@
      */
     acpi_id = obj->Processor.ProcId;
     AcpiOsFree(obj);
-    if (acpi_pcpu_get_id(device_get_unit(dev), &acpi_id, &cpu_id) != 0)
+    if (acpi_pcpu_get_id(dev, &acpi_id, &cpu_id) != 0)
 	return (ENXIO);
 
     /*
@@ -436,36 +443,66 @@
 }
 
 /*
- * Find the nth present CPU and return its pc_cpuid as well as set the
- * pc_acpi_id from the most reliable source.
+ * Find the processor associated with a given ACPI ID.  By default,
+ * use the MADT to map ACPI IDs to APIC IDs and use that to locate a
+ * processor.  Some systems have inconsistent ASL and MADT however.
+ * For these systems the cpu_unordered tunable can be set in which
+ * case we assume that Processor objects are listed in the same order
+ * in both the MADT and ASL.
  */
 static int
-acpi_pcpu_get_id(uint32_t idx, uint32_t *acpi_id, uint32_t *cpu_id)
+acpi_pcpu_get_id(device_t dev, uint32_t *acpi_id, uint32_t *cpu_id)
 {
-    struct pcpu	*pcpu_data;
-    uint32_t	 i;
+    struct pcpu	*pc;
+    uint32_t	 i, idx;
 
     KASSERT(acpi_id != NULL, ("Null acpi_id"));
     KASSERT(cpu_id != NULL, ("Null cpu_id"));
+    idx = device_get_unit(dev);
+
+    /*
+     * If pc_acpi_id for CPU 0 is not initialized (e.g. a non-APIC
+     * UP box) use the ACPI ID from the first processor we find.
+     */
+    if (idx == 0 && mp_ncpus == 1) {
+	pc = pcpu_find(0);
+	if (pc->pc_acpi_id == 0xffffffff)
+	    pc->pc_acpi_id = *acpi_id;
+	*cpu_id = 0;
+	return (0);
+    }
+
     CPU_FOREACH(i) {
-	pcpu_data = pcpu_find(i);
-	KASSERT(pcpu_data != NULL, ("no pcpu data for %d", i));
-	if (idx-- == 0) {
-	    /*
-	     * If pc_acpi_id was not initialized (e.g., a non-APIC UP box)
-	     * override it with the value from the ASL.  Otherwise, if the
-	     * two don't match, prefer the MADT-derived value.  Finally,
-	     * return the pc_cpuid to reference this processor.
-	     */
-	    if (pcpu_data->pc_acpi_id == 0xffffffff)
-		pcpu_data->pc_acpi_id = *acpi_id;
-	    else if (pcpu_data->pc_acpi_id != *acpi_id)
-		*acpi_id = pcpu_data->pc_acpi_id;
-	    *cpu_id = pcpu_data->pc_cpuid;
-	    return (0);
+	pc = pcpu_find(i);
+	KASSERT(pc != NULL, ("no pcpu data for %d", i));
+	if (cpu_unordered) {
+	    if (idx-- == 0) {
+		/*
+		 * If pc_acpi_id doesn't match the ACPI ID from the
+		 * ASL, prefer the MADT-derived value.
+		 */
+		if (pc->pc_acpi_id != *acpi_id)
+		    *acpi_id = pc->pc_acpi_id;
+		*cpu_id = pc->pc_cpuid;
+		return (0);
+	    }
+	} else {
+	    if (pc->pc_acpi_id == *acpi_id) {
+		if (bootverbose)
+		    device_printf(dev,
+			"Processor %s (ACPI ID %u) -> APIC ID %d\n",
+			acpi_name(acpi_get_handle(dev)), *acpi_id,
+			pc->pc_cpuid);
+		*cpu_id = pc->pc_cpuid;
+		return (0);
+	    }
 	}
     }
 
+    if (bootverbose)
+	printf("ACPI: Processor %s (ACPI ID %u) ignored\n",
+	    acpi_name(acpi_get_handle(dev)), *acpi_id);
+
     return (ESRCH);
 }
 
@@ -553,6 +590,7 @@
     /* Use initial sleep value of 1 sec. to start with lowest idle state. */
     sc->cpu_prev_sleep = 1000000;
     sc->cpu_cx_lowest = 0;
+    sc->cpu_cx_lowest_lim = 0;
 
     /*
      * Check for the ACPI 2.0 _CST sleep states object. If we can't find
@@ -592,6 +630,7 @@
     cx_ptr->type = ACPI_STATE_C1;
     cx_ptr->trans_lat = 0;
     cx_ptr++;
+    sc->cpu_non_c3 = sc->cpu_cx_count;
     sc->cpu_cx_count++;
 
     /* 
@@ -616,6 +655,7 @@
 	    cx_ptr->type = ACPI_STATE_C2;
 	    cx_ptr->trans_lat = AcpiGbl_FADT.C2Latency;
 	    cx_ptr++;
+	    sc->cpu_non_c3 = sc->cpu_cx_count;
 	    sc->cpu_cx_count++;
 	}
     }
@@ -633,6 +673,7 @@
 	    cx_ptr->trans_lat = AcpiGbl_FADT.C3Latency;
 	    cx_ptr++;
 	    sc->cpu_cx_count++;
+	    cpu_can_deep_sleep = 1;
 	}
     }
 }
@@ -709,13 +750,13 @@
 		/* This is the first C1 state.  Use the reserved slot. */
 		sc->cpu_cx_states[0] = *cx_ptr;
 	    } else {
-		sc->cpu_non_c3 = i;
+		sc->cpu_non_c3 = sc->cpu_cx_count;
 		cx_ptr++;
 		sc->cpu_cx_count++;
 	    }
 	    continue;
 	case ACPI_STATE_C2:
-	    sc->cpu_non_c3 = i;
+	    sc->cpu_non_c3 = sc->cpu_cx_count;
 	    break;
 	case ACPI_STATE_C3:
 	default:
@@ -724,7 +765,8 @@
 				 "acpi_cpu%d: C3[%d] not available.\n",
 				 device_get_unit(sc->cpu_dev), i));
 		continue;
-	    }
+	    } else
+		cpu_can_deep_sleep = 1;
 	    break;
 	}
 
@@ -779,7 +821,6 @@
      */
     acpi_cpu_quirks();
 
-    cpu_cx_count = 0;
     if (cpu_cx_generic) {
 	/*
 	 * We are using generic Cx mode, probe for available Cx states
@@ -788,24 +829,10 @@
 	for (i = 0; i < cpu_ndevices; i++) {
 	    sc = device_get_softc(cpu_devices[i]);
 	    acpi_cpu_generic_cx_probe(sc);
-	    if (sc->cpu_cx_count > cpu_cx_count)
-		    cpu_cx_count = sc->cpu_cx_count;
-	}
-
-	/*
-	 * Find the highest Cx state common to all CPUs
-	 * in the system, taking quirks into account.
-	 */
-	for (i = 0; i < cpu_ndevices; i++) {
-	    sc = device_get_softc(cpu_devices[i]);
-	    if (sc->cpu_cx_count < cpu_cx_count)
-		cpu_cx_count = sc->cpu_cx_count;
 	}
     } else {
 	/*
 	 * We are using _CST mode, remove C3 state if necessary.
-	 * Update the largest Cx state supported in the global cpu_cx_count.
-	 * It will be used in the global Cx sysctl handler.
 	 * As we now know for sure that we will be using _CST mode
 	 * install our notify handler.
 	 */
@@ -814,8 +841,6 @@
 	    if (cpu_quirks & CPU_QUIRK_NO_C3) {
 		sc->cpu_cx_count = sc->cpu_non_c3 + 1;
 	    }
-	    if (sc->cpu_cx_count > cpu_cx_count)
-		cpu_cx_count = sc->cpu_cx_count;
 	    AcpiInstallNotifyHandler(sc->cpu_handle, ACPI_DEVICE_NOTIFY,
 		acpi_cpu_notify, sc);
 	}
@@ -834,7 +859,7 @@
 	"Global lowest Cx sleep state to use");
 
     /* Take over idling from cpu_idle_default(). */
-    cpu_cx_lowest = 0;
+    cpu_cx_lowest_lim = 0;
     cpu_disable_idle = FALSE;
     cpu_idle_hook = acpi_cpu_idle;
 }
@@ -848,16 +873,10 @@
     /*
      * Set up the list of Cx states
      */
-    sc->cpu_non_c3 = 0;
     sbuf_new(&sb, sc->cpu_cx_supported, sizeof(sc->cpu_cx_supported),
 	SBUF_FIXEDLEN);
-    for (i = 0; i < sc->cpu_cx_count; i++) {
+    for (i = 0; i < sc->cpu_cx_count; i++)
 	sbuf_printf(&sb, "C%d/%d ", i + 1, sc->cpu_cx_states[i].trans_lat);
-	if (sc->cpu_cx_states[i].type < ACPI_STATE_C3)
-	    sc->cpu_non_c3 = i;
-	else
-	    cpu_can_deep_sleep = 1;
-    }
     sbuf_trim(&sb);
     sbuf_finish(&sb);
 }	
@@ -883,14 +902,12 @@
 		    (void *)sc, 0, acpi_cpu_usage_sysctl, "A",
 		    "percent usage for each Cx state");
 
-#ifdef notyet
     /* Signal platform that we can handle _CST notification. */
     if (!cpu_cx_generic && cpu_cst_cnt != 0) {
 	ACPI_LOCK(acpi);
 	AcpiOsWritePort(cpu_smi_cmd, cpu_cst_cnt, 8);
 	ACPI_UNLOCK(acpi);
     }
-#endif
 }
 
 /*
@@ -1025,8 +1042,6 @@
 acpi_cpu_notify(ACPI_HANDLE h, UINT32 notify, void *context)
 {
     struct acpi_cpu_softc *sc = (struct acpi_cpu_softc *)context;
-    struct acpi_cpu_softc *isc;
-    int i;
     
     if (notify != ACPI_NOTIFY_CX_STATES)
 	return;
@@ -1035,16 +1050,8 @@
     acpi_cpu_cx_cst(sc);
     acpi_cpu_cx_list(sc);
 
-    /* Update the new lowest useable Cx state for all CPUs. */
     ACPI_SERIAL_BEGIN(cpu);
-    cpu_cx_count = 0;
-    for (i = 0; i < cpu_ndevices; i++) {
-	isc = device_get_softc(cpu_devices[i]);
-	if (isc->cpu_cx_count > cpu_cx_count)
-	    cpu_cx_count = isc->cpu_cx_count;
-    }
-    if (sc->cpu_cx_lowest < cpu_cx_lowest)
-	acpi_cpu_set_cx_lowest(sc, min(cpu_cx_lowest, sc->cpu_cx_count - 1));
+    acpi_cpu_set_cx_lowest(sc);
     ACPI_SERIAL_END(cpu);
 }
 
@@ -1172,12 +1179,12 @@
 }
 
 static int
-acpi_cpu_set_cx_lowest(struct acpi_cpu_softc *sc, int val)
+acpi_cpu_set_cx_lowest(struct acpi_cpu_softc *sc)
 {
     int i;
 
     ACPI_SERIAL_ASSERT(cpu);
-    sc->cpu_cx_lowest = val;
+    sc->cpu_cx_lowest = min(sc->cpu_cx_lowest_lim, sc->cpu_cx_count - 1);
 
     /* If not disabling, cache the new lowest non-C3 state. */
     sc->cpu_non_c3 = 0;
@@ -1201,18 +1208,23 @@
     int		 val, error;
 
     sc = (struct acpi_cpu_softc *) arg1;
-    snprintf(state, sizeof(state), "C%d", sc->cpu_cx_lowest + 1);
+    snprintf(state, sizeof(state), "C%d", sc->cpu_cx_lowest_lim + 1);
     error = sysctl_handle_string(oidp, state, sizeof(state), req);
     if (error != 0 || req->newptr == NULL)
 	return (error);
     if (strlen(state) < 2 || toupper(state[0]) != 'C')
 	return (EINVAL);
-    val = (int) strtol(state + 1, NULL, 10) - 1;
-    if (val < 0 || val > sc->cpu_cx_count - 1)
-	return (EINVAL);
+    if (strcasecmp(state, "Cmax") == 0)
+	val = MAX_CX_STATES;
+    else {
+	val = (int) strtol(state + 1, NULL, 10);
+	if (val < 1 || val > MAX_CX_STATES)
+	    return (EINVAL);
+    }
 
     ACPI_SERIAL_BEGIN(cpu);
-    acpi_cpu_set_cx_lowest(sc, val);
+    sc->cpu_cx_lowest_lim = val - 1;
+    acpi_cpu_set_cx_lowest(sc);
     ACPI_SERIAL_END(cpu);
 
     return (0);
@@ -1225,22 +1237,27 @@
     char	state[8];
     int		val, error, i;
 
-    snprintf(state, sizeof(state), "C%d", cpu_cx_lowest + 1);
+    snprintf(state, sizeof(state), "C%d", cpu_cx_lowest_lim + 1);
     error = sysctl_handle_string(oidp, state, sizeof(state), req);
     if (error != 0 || req->newptr == NULL)
 	return (error);
     if (strlen(state) < 2 || toupper(state[0]) != 'C')
 	return (EINVAL);
-    val = (int) strtol(state + 1, NULL, 10) - 1;
-    if (val < 0 || val > cpu_cx_count - 1)
-	return (EINVAL);
-    cpu_cx_lowest = val;
+    if (strcasecmp(state, "Cmax") == 0)
+	val = MAX_CX_STATES;
+    else {
+	val = (int) strtol(state + 1, NULL, 10);
+	if (val < 1 || val > MAX_CX_STATES)
+	    return (EINVAL);
+    }
 
     /* Update the new lowest useable Cx state for all CPUs. */
     ACPI_SERIAL_BEGIN(cpu);
+    cpu_cx_lowest_lim = val - 1;
     for (i = 0; i < cpu_ndevices; i++) {
 	sc = device_get_softc(cpu_devices[i]);
-	acpi_cpu_set_cx_lowest(sc, min(val, sc->cpu_cx_count - 1));
+	sc->cpu_cx_lowest_lim = cpu_cx_lowest_lim;
+	acpi_cpu_set_cx_lowest(sc);
     }
     ACPI_SERIAL_END(cpu);
 
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/acpi_ec.c
--- a/head/sys/dev/acpica/acpi_ec.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/acpi_ec.c	Wed Jul 25 17:04:43 2012 +0300
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_ec.c 231161 2012-02-07 20:54:44Z jkim $");
+__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_ec.c 236424 2012-06-01 21:33:33Z jkim $");
 
 #include "opt_acpi.h"
 #include <sys/param.h>
@@ -178,7 +178,6 @@
 
 ACPI_SERIAL_DECL(ec, "ACPI embedded controller");
 
-SYSCTL_DECL(_debug_acpi);
 static SYSCTL_NODE(_debug_acpi, OID_AUTO, ec, CTLFLAG_RD, NULL, "EC debugging");
 
 static int	ec_burst_mode;
@@ -650,8 +649,8 @@
 	Status = EcCommand(sc, EC_COMMAND_QUERY);
 	if (ACPI_SUCCESS(Status))
 	    break;
-	if (EcCheckStatus(sc, "retr_check",
-	    EC_EVENT_INPUT_BUFFER_EMPTY) == AE_OK)
+	if (ACPI_SUCCESS(EcCheckStatus(sc, "retr_check",
+	    EC_EVENT_INPUT_BUFFER_EMPTY)))
 	    continue;
 	else
 	    break;
@@ -846,7 +845,7 @@
 	DELAY(10);
 	for (i = 0; i < count; i++) {
 	    Status = EcCheckStatus(sc, "poll", Event);
-	    if (Status == AE_OK)
+	    if (ACPI_SUCCESS(Status))
 		break;
 	    DELAY(EC_POLL_DELAY);
 	}
@@ -876,7 +875,7 @@
 	     * event we are actually waiting for.
 	     */
 	    Status = EcCheckStatus(sc, "sleep", Event);
-	    if (Status == AE_OK) {
+	    if (ACPI_SUCCESS(Status)) {
 		if (gen_count == sc->ec_gencount)
 		    no_intr++;
 		else
@@ -891,7 +890,7 @@
 	 * read the register once and trust whatever value we got.  This is
 	 * the best we can do at this point.
 	 */
-	if (Status != AE_OK)
+	if (ACPI_FAILURE(Status))
 	    Status = EcCheckStatus(sc, "sleep_end", Event);
     }
     if (!need_poll && no_intr > 10) {
@@ -899,7 +898,7 @@
 	    "not getting interrupts, switched to polled mode\n");
 	ec_polled_mode = 1;
     }
-    if (Status != AE_OK)
+    if (ACPI_FAILURE(Status))
 	    CTR0(KTR_ACPI, "error: ec wait timed out");
     return (Status);
 }
@@ -978,8 +977,8 @@
 	EC_SET_DATA(sc, Address);
 	status = EcWaitEvent(sc, EC_EVENT_OUTPUT_BUFFER_FULL, gen_count);
 	if (ACPI_FAILURE(status)) {
-	    if (EcCheckStatus(sc, "retr_check",
-		EC_EVENT_INPUT_BUFFER_EMPTY) == AE_OK)
+	    if (ACPI_SUCCESS(EcCheckStatus(sc, "retr_check",
+		EC_EVENT_INPUT_BUFFER_EMPTY)))
 		continue;
 	    else
 		break;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/acpi_powerres.c
--- a/head/sys/dev/acpica/acpi_powerres.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/acpi_powerres.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_powerres.c 227293 2011-11-07 06:44:47Z ed $");
+__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_powerres.c 238199 2012-07-07 17:13:09Z eadler $");
 
 #include "opt_acpi.h"
 #include <sys/param.h>
@@ -314,7 +314,6 @@
     ACPI_OBJECT			*reslist_object;
     ACPI_STATUS			status;
     char			*method_name, *reslist_name;
-    int				res_changed;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
@@ -445,7 +444,7 @@
      * Now we are ready to switch, so kill off any current power
      * resource references.
      */
-    res_changed = acpi_pwr_dereference_resource(pc);
+    acpi_pwr_dereference_resource(pc);
 
     /*
      * Add new power resource references, if we have any.  Traverse the
@@ -457,7 +456,6 @@
 			  reslist_object->Package.Count));
 	acpi_ForeachPackageObject(reslist_object, acpi_pwr_reference_resource,
 				  pc);
-	res_changed = 1;
     }
 
     /*
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/acpi_smbat.c
--- a/head/sys/dev/acpica/acpi_smbat.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/acpi_smbat.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_smbat.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_smbat.c 235556 2012-05-17 17:58:53Z jhb $");
 
 #include "opt_acpi.h"
 #include <sys/param.h>
@@ -62,7 +62,6 @@
 
 ACPI_SERIAL_DECL(smbat, "ACPI Smart Battery");
 
-SYSCTL_DECL(_debug_acpi);
 static SYSCTL_NODE(_debug_acpi, OID_AUTO, batt, CTLFLAG_RD, NULL,
     "Battery debugging");
 
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/acpi_video.c
--- a/head/sys/dev/acpica/acpi_video.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/acpi_video.c	Wed Jul 25 17:04:43 2012 +0300
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_video.c 227293 2011-11-07 06:44:47Z ed $");
+__FBSDID("$FreeBSD: head/sys/dev/acpica/acpi_video.c 237197 2012-06-17 16:19:09Z iwasaki $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -75,6 +75,7 @@
 static int	acpi_video_probe(device_t);
 static int	acpi_video_attach(device_t);
 static int	acpi_video_detach(device_t);
+static int	acpi_video_resume(device_t);
 static int	acpi_video_shutdown(device_t);
 static void	acpi_video_notify_handler(ACPI_HANDLE, UINT32, void *);
 static void	acpi_video_power_profile(void *);
@@ -155,6 +156,7 @@
 	DEVMETHOD(device_probe, acpi_video_probe),
 	DEVMETHOD(device_attach, acpi_video_attach),
 	DEVMETHOD(device_detach, acpi_video_detach),
+	DEVMETHOD(device_resume, acpi_video_resume),
 	DEVMETHOD(device_shutdown, acpi_video_shutdown),
 	{ 0, 0 }
 };
@@ -305,6 +307,36 @@
 }
 
 static int
+acpi_video_resume(device_t dev)
+{
+	struct acpi_video_softc *sc;
+	struct acpi_video_output *vo, *vn;
+	int level;
+
+	sc = device_get_softc(dev);
+
+	/* Restore brightness level */
+	ACPI_SERIAL_BEGIN(video);
+	ACPI_SERIAL_BEGIN(video_output);
+	STAILQ_FOREACH_SAFE(vo, &sc->vid_outputs, vo_next, vn) {
+		if ((vo->adr & DOD_DEVID_MASK_FULL) != DOD_DEVID_LCD &&
+		    (vo->adr & DOD_DEVID_MASK) != DOD_DEVID_INTDFP)
+			continue;
+
+		if ((vo_get_device_status(vo->handle) & DCS_ACTIVE) == 0)
+			continue;
+
+		level = vo_get_brightness(vo->handle);
+		if (level != -1)
+			vo_set_brightness(vo->handle, level);
+	}
+	ACPI_SERIAL_END(video_output);
+	ACPI_SERIAL_END(video);
+
+	return (0);
+}
+
+static int
 acpi_video_shutdown(device_t dev)
 {
 	struct acpi_video_softc *sc;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/acpica/acpivar.h
--- a/head/sys/dev/acpica/acpivar.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/acpica/acpivar.h	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/acpica/acpivar.h 233250 2012-03-20 21:37:52Z jkim $
+ * $FreeBSD: head/sys/dev/acpica/acpivar.h 236772 2012-06-09 00:37:26Z iwasaki $
  */
 
 #ifndef _ACPIVAR_H_
@@ -71,8 +71,6 @@
     int			acpi_verbose;
     int			acpi_handle_reboot;
 
-    bus_dma_tag_t	acpi_waketag;
-    bus_dmamap_t	acpi_wakemap;
     vm_offset_t		acpi_wakeaddr;
     vm_paddr_t		acpi_wakephys;
 
@@ -273,7 +271,7 @@
 
     if ((h = acpi_get_handle(dev)) == NULL)
 	return (ACPI_TYPE_NOT_FOUND);
-    if (AcpiGetType(h, &t) != AE_OK)
+    if (ACPI_FAILURE(AcpiGetType(h, &t)))
 	return (ACPI_TYPE_NOT_FOUND);
     return (t);
 }
@@ -439,6 +437,8 @@
 int		acpi_machdep_init(device_t dev);
 void		acpi_install_wakeup_handler(struct acpi_softc *sc);
 int		acpi_sleep_machdep(struct acpi_softc *sc, int state);
+int		acpi_wakeup_machdep(struct acpi_softc *sc, int state,
+		    int sleep_result, int intr_enabled);
 int		acpi_table_quirks(int *quirks);
 int		acpi_machdep_quirks(int *quirks);
 
@@ -492,5 +492,7 @@
 /* Use the device logging level for ktr(4). */
 #define	KTR_ACPI		KTR_DEV
 
+SYSCTL_DECL(_debug_acpi);
+
 #endif /* _KERNEL */
 #endif /* !_ACPIVAR_H_ */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/adb/adb_kbd.c
--- a/head/sys/dev/adb/adb_kbd.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/adb/adb_kbd.c	Wed Jul 25 17:04:43 2012 +0300
@@ -22,7 +22,7 @@
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/adb/adb_kbd.c 226449 2011-10-16 21:01:42Z nwhitehorn $
+ * $FreeBSD: head/sys/dev/adb/adb_kbd.c 237480 2012-06-23 13:52:44Z jhibbits $
  */
 
 #include <sys/cdefs.h>
@@ -426,8 +426,10 @@
 		/* 0x7f is always the power button */
 		if (data[0] == 0x7f && devctl_process_running()) {
 			devctl_notify("PMU", "Button", "pressed", NULL);
+			mtx_unlock(&sc->sc_mutex);
 			return (0);
 		} else if (data[0] == 0xff) {
+			mtx_unlock(&sc->sc_mutex);
 			return (0);	/* Ignore power button release. */
 		}
 		if ((data[0] & 0x7f) == 57 && sc->buffers < 7) {
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/ae/if_ae.c
--- a/head/sys/dev/ae/if_ae.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/ae/if_ae.c	Wed Jul 25 17:04:43 2012 +0300
@@ -28,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/ae/if_ae.c 227452 2011-11-11 19:15:32Z yongari $");
+__FBSDID("$FreeBSD: head/sys/dev/ae/if_ae.c 236670 2012-06-06 09:07:50Z pluknet $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1381,12 +1381,13 @@
 	/*
 	 * Configure PME.
 	 */
-	pci_find_cap(sc->dev, PCIY_PMG, &pmc);
-	pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2);
-	pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
-	if ((ifp->if_capenable & IFCAP_WOL) != 0)
-		pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
-	pci_write_config(sc->dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
+	if (pci_find_cap(sc->dev, PCIY_PMG, &pmc) == 0) {
+		pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2);
+		pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
+		if ((ifp->if_capenable & IFCAP_WOL) != 0)
+			pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
+		pci_write_config(sc->dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
+	}
 }
 
 static int
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/agp/agp.c
--- a/head/sys/dev/agp/agp.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/agp/agp.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/agp/agp.c 238172 2012-07-06 15:57:03Z marcel $");
 
 #include "opt_agp.h"
 #include "opt_bus.h"
@@ -55,7 +55,6 @@
 #include <vm/vm_pageout.h>
 #include <vm/pmap.h>
 
-#include <machine/md_var.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
@@ -234,12 +233,13 @@
 	 * Work out an upper bound for agp memory allocation. This
 	 * uses a heurisitc table from the Linux driver.
 	 */
-	memsize = ptoa(Maxmem) >> 20;
+	memsize = ptoa(realmem) >> 20;
 	for (i = 0; i < agp_max_size; i++) {
 		if (memsize <= agp_max[i][0])
 			break;
 	}
-	if (i == agp_max_size) i = agp_max_size - 1;
+	if (i == agp_max_size)
+		i = agp_max_size - 1;
 	sc->as_maxmem = agp_max[i][1] << 20U;
 
 	/*
@@ -803,6 +803,13 @@
 }
 
 static int
+agp_chipset_flush(device_t dev)
+{
+
+	return (AGP_CHIPSET_FLUSH(dev));
+}
+
+static int
 agp_open(struct cdev *kdev, int oflags, int devtype, struct thread *td)
 {
 	device_t dev = kdev->si_drv1;
@@ -869,6 +876,8 @@
 	case AGPIOC_UNBIND:
 		return agp_unbind_user(dev, (agp_unbind *)data);
 
+	case AGPIOC_CHIPSET_FLUSH:
+		return agp_chipset_flush(dev);
 	}
 
 	return EINVAL;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/agp/agp_i810.c
--- a/head/sys/dev/agp/agp_i810.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/agp/agp_i810.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,8 +1,12 @@
 /*-
  * Copyright (c) 2000 Doug Rabson
  * Copyright (c) 2000 Ruslan Ermilov
+ * Copyright (c) 2011 The FreeBSD Foundation
  * All rights reserved.
  *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -28,17 +32,27 @@
 /*
  * Fixes for 830/845G support: David Dawes <dawes at xfree86.org>
  * 852GM/855GM/865G support added by David Dawes <dawes at xfree86.org>
+ *
+ * This is generic Intel GTT handling code, morphed from the AGP
+ * bridge code.
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/agp/agp_i810.c 238301 2012-07-09 16:23:59Z marcel $");
 
 #include "opt_bus.h"
 
+#if 0
+#define	KTR_AGP_I810	KTR_DEV
+#else
+#define	KTR_AGP_I810	0
+#endif
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
+#include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
@@ -47,8 +61,10 @@
 
 #include <dev/agp/agppriv.h>
 #include <dev/agp/agpreg.h>
+#include <dev/agp/agp_i810.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
+#include <dev/pci/pci_private.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
@@ -63,6 +79,88 @@
 
 MALLOC_DECLARE(M_AGP);
 
+struct agp_i810_match;
+
+static int agp_i810_check_active(device_t bridge_dev);
+static int agp_i830_check_active(device_t bridge_dev);
+static int agp_i915_check_active(device_t bridge_dev);
+static int agp_sb_check_active(device_t bridge_dev);
+
+static void agp_82852_set_desc(device_t dev,
+    const struct agp_i810_match *match);
+static void agp_i810_set_desc(device_t dev, const struct agp_i810_match *match);
+
+static void agp_i810_dump_regs(device_t dev);
+static void agp_i830_dump_regs(device_t dev);
+static void agp_i855_dump_regs(device_t dev);
+static void agp_i915_dump_regs(device_t dev);
+static void agp_i965_dump_regs(device_t dev);
+static void agp_sb_dump_regs(device_t dev);
+
+static int agp_i810_get_stolen_size(device_t dev);
+static int agp_i830_get_stolen_size(device_t dev);
+static int agp_i915_get_stolen_size(device_t dev);
+static int agp_sb_get_stolen_size(device_t dev);
+
+static int agp_i810_get_gtt_mappable_entries(device_t dev);
+static int agp_i830_get_gtt_mappable_entries(device_t dev);
+static int agp_i915_get_gtt_mappable_entries(device_t dev);
+
+static int agp_i810_get_gtt_total_entries(device_t dev);
+static int agp_i965_get_gtt_total_entries(device_t dev);
+static int agp_gen5_get_gtt_total_entries(device_t dev);
+static int agp_sb_get_gtt_total_entries(device_t dev);
+
+static int agp_i810_install_gatt(device_t dev);
+static int agp_i830_install_gatt(device_t dev);
+
+static void agp_i810_deinstall_gatt(device_t dev);
+static void agp_i830_deinstall_gatt(device_t dev);
+
+static void agp_i810_install_gtt_pte(device_t dev, u_int index,
+    vm_offset_t physical, int flags);
+static void agp_i830_install_gtt_pte(device_t dev, u_int index,
+    vm_offset_t physical, int flags);
+static void agp_i915_install_gtt_pte(device_t dev, u_int index,
+    vm_offset_t physical, int flags);
+static void agp_i965_install_gtt_pte(device_t dev, u_int index,
+    vm_offset_t physical, int flags);
+static void agp_g4x_install_gtt_pte(device_t dev, u_int index,
+    vm_offset_t physical, int flags);
+static void agp_sb_install_gtt_pte(device_t dev, u_int index,
+    vm_offset_t physical, int flags);
+
+static void agp_i810_write_gtt(device_t dev, u_int index, uint32_t pte);
+static void agp_i915_write_gtt(device_t dev, u_int index, uint32_t pte);
+static void agp_i965_write_gtt(device_t dev, u_int index, uint32_t pte);
+static void agp_g4x_write_gtt(device_t dev, u_int index, uint32_t pte);
+static void agp_sb_write_gtt(device_t dev, u_int index, uint32_t pte);
+
+static u_int32_t agp_i810_read_gtt_pte(device_t dev, u_int index);
+static u_int32_t agp_i915_read_gtt_pte(device_t dev, u_int index);
+static u_int32_t agp_i965_read_gtt_pte(device_t dev, u_int index);
+static u_int32_t agp_g4x_read_gtt_pte(device_t dev, u_int index);
+
+static vm_paddr_t agp_i810_read_gtt_pte_paddr(device_t dev, u_int index);
+static vm_paddr_t agp_i915_read_gtt_pte_paddr(device_t dev, u_int index);
+static vm_paddr_t agp_sb_read_gtt_pte_paddr(device_t dev, u_int index);
+
+static int agp_i810_set_aperture(device_t dev, u_int32_t aperture);
+static int agp_i830_set_aperture(device_t dev, u_int32_t aperture);
+static int agp_i915_set_aperture(device_t dev, u_int32_t aperture);
+
+static int agp_i810_chipset_flush_setup(device_t dev);
+static int agp_i915_chipset_flush_setup(device_t dev);
+static int agp_i965_chipset_flush_setup(device_t dev);
+
+static void agp_i810_chipset_flush_teardown(device_t dev);
+static void agp_i915_chipset_flush_teardown(device_t dev);
+static void agp_i965_chipset_flush_teardown(device_t dev);
+
+static void agp_i810_chipset_flush(device_t dev);
+static void agp_i830_chipset_flush(device_t dev);
+static void agp_i915_chipset_flush(device_t dev);
+
 enum {
 	CHIP_I810,	/* i810/i815 */
 	CHIP_I830,	/* 830M/845G */
@@ -72,6 +170,7 @@
 	CHIP_G33,	/* G33/Q33/Q35 */
 	CHIP_IGD,	/* Pineview */
 	CHIP_G4X,	/* G45/Q45 */
+	CHIP_SB,	/* SandyBridge */
 };
 
 /* The i810 through i855 have the registers at BAR 1, and the GATT gets
@@ -96,19 +195,308 @@
 	{ -1, 0 }
 };
 
+static struct resource_spec agp_g4x_res_spec[] = {
+	{ SYS_RES_MEMORY, AGP_G4X_MMADR, RF_ACTIVE | RF_SHAREABLE },
+	{ SYS_RES_MEMORY, AGP_G4X_GTTADR, RF_ACTIVE | RF_SHAREABLE },
+	{ -1, 0 }
+};
+
 struct agp_i810_softc {
 	struct agp_softc agp;
 	u_int32_t initial_aperture;	/* aperture size at startup */
 	struct agp_gatt *gatt;
-	int chiptype;			/* i810-like or i830 */
 	u_int32_t dcache_size;		/* i810 only */
-	u_int32_t stolen;		/* number of i830/845 gtt entries for stolen memory */
+	u_int32_t stolen;		/* number of i830/845 gtt
+					   entries for stolen memory */
+	u_int stolen_size;		/* BIOS-reserved graphics memory */
+	u_int gtt_total_entries;	/* Total number of gtt ptes */
+	u_int gtt_mappable_entries;	/* Number of gtt ptes mappable by CPU */
 	device_t bdev;			/* bridge device */
+	void *argb_cursor;		/* contigmalloc area for ARGB cursor */
+	struct resource *sc_res[2];
+	const struct agp_i810_match *match;
+	int sc_flush_page_rid;
+	struct resource *sc_flush_page_res;
+	void *sc_flush_page_vaddr;
+	int sc_bios_allocated_flush_page;
+};
 
-	void *argb_cursor;		/* contigmalloc area for ARGB cursor */
+static device_t intel_agp;
 
-	struct resource_spec * sc_res_spec;
-	struct resource *sc_res[2];
+struct agp_i810_driver {
+	int chiptype;
+	int gen;
+	int busdma_addr_mask_sz;
+	struct resource_spec *res_spec;
+	int (*check_active)(device_t);
+	void (*set_desc)(device_t, const struct agp_i810_match *);
+	void (*dump_regs)(device_t);
+	int (*get_stolen_size)(device_t);
+	int (*get_gtt_total_entries)(device_t);
+	int (*get_gtt_mappable_entries)(device_t);
+	int (*install_gatt)(device_t);
+	void (*deinstall_gatt)(device_t);
+	void (*write_gtt)(device_t, u_int, uint32_t);
+	void (*install_gtt_pte)(device_t, u_int, vm_offset_t, int);
+	u_int32_t (*read_gtt_pte)(device_t, u_int);
+	vm_paddr_t (*read_gtt_pte_paddr)(device_t , u_int);
+	int (*set_aperture)(device_t, u_int32_t);
+	int (*chipset_flush_setup)(device_t);
+	void (*chipset_flush_teardown)(device_t);
+	void (*chipset_flush)(device_t);
+};
+
+static const struct agp_i810_driver agp_i810_i810_driver = {
+	.chiptype = CHIP_I810,
+	.gen = 1,
+	.busdma_addr_mask_sz = 32,
+	.res_spec = agp_i810_res_spec,
+	.check_active = agp_i810_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i810_dump_regs,
+	.get_stolen_size = agp_i810_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i810_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
+	.install_gatt = agp_i810_install_gatt,
+	.deinstall_gatt = agp_i810_deinstall_gatt,
+	.write_gtt = agp_i810_write_gtt,
+	.install_gtt_pte = agp_i810_install_gtt_pte,
+	.read_gtt_pte = agp_i810_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
+	.set_aperture = agp_i810_set_aperture,
+	.chipset_flush_setup = agp_i810_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
+	.chipset_flush = agp_i810_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_i815_driver = {
+	.chiptype = CHIP_I810,
+	.gen = 2,
+	.busdma_addr_mask_sz = 32,
+	.res_spec = agp_i810_res_spec,
+	.check_active = agp_i810_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i810_dump_regs,
+	.get_stolen_size = agp_i810_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i830_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
+	.install_gatt = agp_i810_install_gatt,
+	.deinstall_gatt = agp_i810_deinstall_gatt,
+	.write_gtt = agp_i810_write_gtt,
+	.install_gtt_pte = agp_i810_install_gtt_pte,
+	.read_gtt_pte = agp_i810_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
+	.set_aperture = agp_i810_set_aperture,
+	.chipset_flush_setup = agp_i810_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
+	.chipset_flush = agp_i830_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_i830_driver = {
+	.chiptype = CHIP_I830,
+	.gen = 2,
+	.busdma_addr_mask_sz = 32,
+	.res_spec = agp_i810_res_spec,
+	.check_active = agp_i830_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i830_dump_regs,
+	.get_stolen_size = agp_i830_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i830_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_i810_write_gtt,
+	.install_gtt_pte = agp_i830_install_gtt_pte,
+	.read_gtt_pte = agp_i810_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
+	.set_aperture = agp_i830_set_aperture,
+	.chipset_flush_setup = agp_i810_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
+	.chipset_flush = agp_i830_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_i855_driver = {
+	.chiptype = CHIP_I855,
+	.gen = 2,
+	.busdma_addr_mask_sz = 32,
+	.res_spec = agp_i810_res_spec,
+	.check_active = agp_i830_check_active,
+	.set_desc = agp_82852_set_desc,
+	.dump_regs = agp_i855_dump_regs,
+	.get_stolen_size = agp_i915_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_i810_write_gtt,
+	.install_gtt_pte = agp_i830_install_gtt_pte,
+	.read_gtt_pte = agp_i810_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
+	.set_aperture = agp_i830_set_aperture,
+	.chipset_flush_setup = agp_i810_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
+	.chipset_flush = agp_i830_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_i865_driver = {
+	.chiptype = CHIP_I855,
+	.gen = 2,
+	.busdma_addr_mask_sz = 32,
+	.res_spec = agp_i810_res_spec,
+	.check_active = agp_i830_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i855_dump_regs,
+	.get_stolen_size = agp_i915_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_i810_write_gtt,
+	.install_gtt_pte = agp_i830_install_gtt_pte,
+	.read_gtt_pte = agp_i810_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
+	.set_aperture = agp_i915_set_aperture,
+	.chipset_flush_setup = agp_i810_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
+	.chipset_flush = agp_i830_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_i915_driver = {
+	.chiptype = CHIP_I915,
+	.gen = 3,
+	.busdma_addr_mask_sz = 32,
+	.res_spec = agp_i915_res_spec,
+	.check_active = agp_i915_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i915_dump_regs,
+	.get_stolen_size = agp_i915_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_i915_write_gtt,
+	.install_gtt_pte = agp_i915_install_gtt_pte,
+	.read_gtt_pte = agp_i915_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
+	.set_aperture = agp_i915_set_aperture,
+	.chipset_flush_setup = agp_i915_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i915_chipset_flush_teardown,
+	.chipset_flush = agp_i915_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_g965_driver = {
+	.chiptype = CHIP_I965,
+	.gen = 4,
+	.busdma_addr_mask_sz = 36,
+	.res_spec = agp_i965_res_spec,
+	.check_active = agp_i915_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i965_dump_regs,
+	.get_stolen_size = agp_i915_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i965_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_i965_write_gtt,
+	.install_gtt_pte = agp_i965_install_gtt_pte,
+	.read_gtt_pte = agp_i965_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
+	.set_aperture = agp_i915_set_aperture,
+	.chipset_flush_setup = agp_i965_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i965_chipset_flush_teardown,
+	.chipset_flush = agp_i915_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_g33_driver = {
+	.chiptype = CHIP_G33,
+	.gen = 3,
+	.busdma_addr_mask_sz = 36,
+	.res_spec = agp_i915_res_spec,
+	.check_active = agp_i915_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i965_dump_regs,
+	.get_stolen_size = agp_i915_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i965_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_i915_write_gtt,
+	.install_gtt_pte = agp_i915_install_gtt_pte,
+	.read_gtt_pte = agp_i915_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
+	.set_aperture = agp_i915_set_aperture,
+	.chipset_flush_setup = agp_i965_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i965_chipset_flush_teardown,
+	.chipset_flush = agp_i915_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_igd_driver = {
+	.chiptype = CHIP_IGD,
+	.gen = 3,
+	.busdma_addr_mask_sz = 36,
+	.res_spec = agp_i915_res_spec,
+	.check_active = agp_i915_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i915_dump_regs,
+	.get_stolen_size = agp_i915_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_i965_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_i915_write_gtt,
+	.install_gtt_pte = agp_i915_install_gtt_pte,
+	.read_gtt_pte = agp_i915_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
+	.set_aperture = agp_i915_set_aperture,
+	.chipset_flush_setup = agp_i965_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i965_chipset_flush_teardown,
+	.chipset_flush = agp_i915_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_g4x_driver = {
+	.chiptype = CHIP_G4X,
+	.gen = 5,
+	.busdma_addr_mask_sz = 36,
+	.res_spec = agp_i965_res_spec,
+	.check_active = agp_i915_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_i965_dump_regs,
+	.get_stolen_size = agp_i915_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_gen5_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_g4x_write_gtt,
+	.install_gtt_pte = agp_g4x_install_gtt_pte,
+	.read_gtt_pte = agp_g4x_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
+	.set_aperture = agp_i915_set_aperture,
+	.chipset_flush_setup = agp_i965_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i965_chipset_flush_teardown,
+	.chipset_flush = agp_i915_chipset_flush,
+};
+
+static const struct agp_i810_driver agp_i810_sb_driver = {
+	.chiptype = CHIP_SB,
+	.gen = 6,
+	.busdma_addr_mask_sz = 40,
+	.res_spec = agp_g4x_res_spec,
+	.check_active = agp_sb_check_active,
+	.set_desc = agp_i810_set_desc,
+	.dump_regs = agp_sb_dump_regs,
+	.get_stolen_size = agp_sb_get_stolen_size,
+	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
+	.get_gtt_total_entries = agp_sb_get_gtt_total_entries,
+	.install_gatt = agp_i830_install_gatt,
+	.deinstall_gatt = agp_i830_deinstall_gatt,
+	.write_gtt = agp_sb_write_gtt,
+	.install_gtt_pte = agp_sb_install_gtt_pte,
+	.read_gtt_pte = agp_g4x_read_gtt_pte,
+	.read_gtt_pte_paddr = agp_sb_read_gtt_pte_paddr,
+	.set_aperture = agp_i915_set_aperture,
+	.chipset_flush_setup = agp_i810_chipset_flush_setup,
+	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
+	.chipset_flush = agp_i810_chipset_flush,
 };
 
 /* For adding new devices, devid is the id of the graphics controller
@@ -118,75 +506,232 @@
  */
 static const struct agp_i810_match {
 	int devid;
-	int chiptype;
-	int bridge_offset;
 	char *name;
+	const struct agp_i810_driver *driver;
 } agp_i810_matches[] = {
-	{0x71218086, CHIP_I810, 0x00010000,
-	    "Intel 82810 (i810 GMCH) SVGA controller"},
-	{0x71238086, CHIP_I810, 0x00010000,
-	    "Intel 82810-DC100 (i810-DC100 GMCH) SVGA controller"},
-	{0x71258086, CHIP_I810, 0x00010000,
-	    "Intel 82810E (i810E GMCH) SVGA controller"},
-	{0x11328086, CHIP_I810, 0x00020000,
-	    "Intel 82815 (i815 GMCH) SVGA controller"},
-	{0x35778086, CHIP_I830, 0x00020000,
-	    "Intel 82830M (830M GMCH) SVGA controller"},
-	{0x25628086, CHIP_I830, 0x00020000,
-	    "Intel 82845M (845M GMCH) SVGA controller"},
-	{0x35828086, CHIP_I855, 0x00020000,
-	    "Intel 82852/855GM SVGA controller"},
-	{0x25728086, CHIP_I855, 0x00020000,
-	    "Intel 82865G (865G GMCH) SVGA controller"},
-	{0x25828086, CHIP_I915, 0x00020000,
-	    "Intel 82915G (915G GMCH) SVGA controller"},
-	{0x258A8086, CHIP_I915, 0x00020000,
-	    "Intel E7221 SVGA controller"},
-	{0x25928086, CHIP_I915, 0x00020000,
-	    "Intel 82915GM (915GM GMCH) SVGA controller"},
-	{0x27728086, CHIP_I915, 0x00020000,
-	    "Intel 82945G (945G GMCH) SVGA controller"},
-	{0x27A28086, CHIP_I915, 0x00020000,
-	    "Intel 82945GM (945GM GMCH) SVGA controller"},
-	{0x27AE8086, CHIP_I915, 0x00020000,
-	    "Intel 945GME SVGA controller"},
-	{0x29728086, CHIP_I965, 0x00020000,
-	    "Intel 946GZ SVGA controller"},
-	{0x29828086, CHIP_I965, 0x00020000,
-	    "Intel G965 SVGA controller"},
-	{0x29928086, CHIP_I965, 0x00020000,
-	    "Intel Q965 SVGA controller"},
-	{0x29A28086, CHIP_I965, 0x00020000,
-	    "Intel G965 SVGA controller"},
-	{0x29B28086, CHIP_G33, 0x00020000,
-	    "Intel Q35 SVGA controller"},
-	{0x29C28086, CHIP_G33, 0x00020000,
-	    "Intel G33 SVGA controller"},
-	{0x29D28086, CHIP_G33, 0x00020000,
-	    "Intel Q33 SVGA controller"},
-	{0xA0018086, CHIP_IGD, 0x00010000,
-	    "Intel Pineview SVGA controller"},
-	{0xA0118086, CHIP_IGD, 0x00010000,
-	    "Intel Pineview (M) SVGA controller"},
-	{0x2A028086, CHIP_I965, 0x00020000,
-	    "Intel GM965 SVGA controller"},
-	{0x2A128086, CHIP_I965, 0x00020000,
-	    "Intel GME965 SVGA controller"},
-	{0x2A428086, CHIP_G4X, 0x00020000,
-	    "Intel GM45 SVGA controller"},
-	{0x2E028086, CHIP_G4X, 0x00020000,
-	    "Intel Eaglelake SVGA controller"},
-	{0x2E128086, CHIP_G4X, 0x00020000,
-	    "Intel Q45 SVGA controller"},
-	{0x2E228086, CHIP_G4X, 0x00020000,
-	    "Intel G45 SVGA controller"},
-	{0x2E328086, CHIP_G4X, 0x00020000,
-	    "Intel G41 SVGA controller"},
-	{0x00428086, CHIP_G4X, 0x00020000,
-	    "Intel Ironlake (D) SVGA controller"},
-	{0x00468086, CHIP_G4X, 0x00020000,
-	    "Intel Ironlake (M) SVGA controller"},
-	{0, 0, 0, NULL}
+	{
+		.devid = 0x71218086,
+		.name = "Intel 82810 (i810 GMCH) SVGA controller",
+		.driver = &agp_i810_i810_driver
+	},
+	{
+		.devid = 0x71238086,
+		.name = "Intel 82810-DC100 (i810-DC100 GMCH) SVGA controller",
+		.driver = &agp_i810_i810_driver
+	},
+	{
+		.devid = 0x71258086,
+		.name = "Intel 82810E (i810E GMCH) SVGA controller",
+		.driver = &agp_i810_i810_driver
+	},
+	{
+		.devid = 0x11328086,
+		.name = "Intel 82815 (i815 GMCH) SVGA controller",
+		.driver = &agp_i810_i815_driver
+	},
+	{
+		.devid = 0x35778086,
+		.name = "Intel 82830M (830M GMCH) SVGA controller",
+		.driver = &agp_i810_i830_driver
+	},
+	{
+		.devid = 0x25628086,
+		.name = "Intel 82845M (845M GMCH) SVGA controller",
+		.driver = &agp_i810_i830_driver
+	},
+	{
+		.devid = 0x35828086,
+		.name = "Intel 82852/855GM SVGA controller",
+		.driver = &agp_i810_i855_driver
+	},
+	{
+		.devid = 0x25728086,
+		.name = "Intel 82865G (865G GMCH) SVGA controller",
+		.driver = &agp_i810_i865_driver
+	},
+	{
+		.devid = 0x25828086,
+		.name = "Intel 82915G (915G GMCH) SVGA controller",
+		.driver = &agp_i810_i915_driver
+	},
+	{
+		.devid = 0x258A8086,
+		.name = "Intel E7221 SVGA controller",
+		.driver = &agp_i810_i915_driver
+	},
+	{
+		.devid = 0x25928086,
+		.name = "Intel 82915GM (915GM GMCH) SVGA controller",
+		.driver = &agp_i810_i915_driver
+	},
+	{
+		.devid = 0x27728086,
+		.name = "Intel 82945G (945G GMCH) SVGA controller",
+		.driver = &agp_i810_i915_driver
+	},
+	{
+		.devid = 0x27A28086,
+		.name = "Intel 82945GM (945GM GMCH) SVGA controller",
+		.driver = &agp_i810_i915_driver
+	},
+	{
+		.devid = 0x27AE8086,
+		.name = "Intel 945GME SVGA controller",
+		.driver = &agp_i810_i915_driver
+	},
+	{
+		.devid = 0x29728086,
+		.name = "Intel 946GZ SVGA controller",
+		.driver = &agp_i810_g965_driver
+	},
+	{
+		.devid = 0x29828086,
+		.name = "Intel G965 SVGA controller",
+		.driver = &agp_i810_g965_driver
+	},
+	{
+		.devid = 0x29928086,
+		.name = "Intel Q965 SVGA controller",
+		.driver = &agp_i810_g965_driver
+	},
+	{
+		.devid = 0x29A28086,
+		.name = "Intel G965 SVGA controller",
+		.driver = &agp_i810_g965_driver
+	},
+	{
+		.devid = 0x29B28086,
+		.name = "Intel Q35 SVGA controller",
+		.driver = &agp_i810_g33_driver
+	},
+	{
+		.devid = 0x29C28086,
+		.name = "Intel G33 SVGA controller",
+		.driver = &agp_i810_g33_driver
+	},
+	{
+		.devid = 0x29D28086,
+		.name = "Intel Q33 SVGA controller",
+		.driver = &agp_i810_g33_driver
+	},
+	{
+		.devid = 0xA0018086,
+		.name = "Intel Pineview SVGA controller",
+		.driver = &agp_i810_igd_driver
+	},
+	{
+		.devid = 0xA0118086,
+		.name = "Intel Pineview (M) SVGA controller",
+		.driver = &agp_i810_igd_driver
+	},
+	{
+		.devid = 0x2A028086,
+		.name = "Intel GM965 SVGA controller",
+		.driver = &agp_i810_g965_driver
+	},
+	{
+		.devid = 0x2A128086,
+		.name = "Intel GME965 SVGA controller",
+		.driver = &agp_i810_g965_driver
+	},
+	{
+		.devid = 0x2A428086,
+		.name = "Intel GM45 SVGA controller",
+		.driver = &agp_i810_g4x_driver
+	},
+	{
+		.devid = 0x2E028086,
+		.name = "Intel Eaglelake SVGA controller",
+		.driver = &agp_i810_g4x_driver
+	},
+	{
+		.devid = 0x2E128086,
+		.name = "Intel Q45 SVGA controller",
+		.driver = &agp_i810_g4x_driver
+	},
+	{
+		.devid = 0x2E228086,
+		.name = "Intel G45 SVGA controller",
+		.driver = &agp_i810_g4x_driver
+	},
+	{
+		.devid = 0x2E328086,
+		.name = "Intel G41 SVGA controller",
+		.driver = &agp_i810_g4x_driver
+	},
+	{
+		.devid = 0x00428086,
+		.name = "Intel Ironlake (D) SVGA controller",
+		.driver = &agp_i810_g4x_driver
+	},
+	{
+		.devid = 0x00468086,
+		.name = "Intel Ironlake (M) SVGA controller",
+		.driver = &agp_i810_g4x_driver
+	},
+	{
+		.devid = 0x01028086,
+		.name = "SandyBridge desktop GT1 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01128086,
+		.name = "SandyBridge desktop GT2 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01228086,
+		.name = "SandyBridge desktop GT2+ IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01068086,
+		.name = "SandyBridge mobile GT1 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01168086,
+		.name = "SandyBridge mobile GT2 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01268086,
+		.name = "SandyBridge mobile GT2+ IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x010a8086,
+		.name = "SandyBridge server IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01528086,
+		.name = "IvyBridge desktop GT1 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01628086,
+		.name = "IvyBridge desktop GT2 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01568086,
+		.name = "IvyBridge mobile GT1 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x01668086,
+		.name = "IvyBridge mobile GT2 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0x015a8086,
+		.name = "IvyBridge server GT1 IG",
+		.driver = &agp_i810_sb_driver
+	},
+	{
+		.devid = 0,
+	}
 };
 
 static const struct agp_i810_match*
@@ -196,17 +741,17 @@
 
 	if (pci_get_class(dev) != PCIC_DISPLAY
 	    || pci_get_subclass(dev) != PCIS_DISPLAY_VGA)
-		return NULL;
+		return (NULL);
 
 	devid = pci_get_devid(dev);
 	for (i = 0; agp_i810_matches[i].devid != 0; i++) {
 		if (agp_i810_matches[i].devid == devid)
-		    break;
+			break;
 	}
 	if (agp_i810_matches[i].devid == 0)
-		return NULL;
+		return (NULL);
 	else
-		return &agp_i810_matches[i];
+		return (&agp_i810_matches[i]);
 }
 
 /*
@@ -215,28 +760,8 @@
 static device_t
 agp_i810_find_bridge(device_t dev)
 {
-	device_t *children, child;
-	int nchildren, i;
-	u_int32_t devid;
-	const struct agp_i810_match *match;
-  
-	match = agp_i810_match(dev);
-	devid = match->devid - match->bridge_offset;
 
-	if (device_get_children(device_get_parent(device_get_parent(dev)),
-	    &children, &nchildren))
-		return 0;
-
-	for (i = 0; i < nchildren; i++) {
-		child = children[i];
-
-		if (pci_get_devid(child) == devid) {
-			free(children, M_TEMP);
-			return child;
-		}
-	}
-	free(children, M_TEMP);
-	return 0;
+	return (pci_find_dbsf(0, 0, 0, 0));
 }
 
 static void
@@ -249,92 +774,116 @@
 }
 
 static int
+agp_i810_check_active(device_t bridge_dev)
+{
+	u_int8_t smram;
+
+	smram = pci_read_config(bridge_dev, AGP_I810_SMRAM, 1);
+	if ((smram & AGP_I810_SMRAM_GMS) == AGP_I810_SMRAM_GMS_DISABLED)
+		return (ENXIO);
+	return (0);
+}
+
+static int
+agp_i830_check_active(device_t bridge_dev)
+{
+	int gcc1;
+
+	gcc1 = pci_read_config(bridge_dev, AGP_I830_GCC1, 1);
+	if ((gcc1 & AGP_I830_GCC1_DEV2) == AGP_I830_GCC1_DEV2_DISABLED)
+		return (ENXIO);
+	return (0);
+}
+
+static int
+agp_i915_check_active(device_t bridge_dev)
+{
+	int deven;
+
+	deven = pci_read_config(bridge_dev, AGP_I915_DEVEN, 4);
+	if ((deven & AGP_I915_DEVEN_D2F0) == AGP_I915_DEVEN_D2F0_DISABLED)
+		return (ENXIO);
+	return (0);
+}
+
+static int
+agp_sb_check_active(device_t bridge_dev)
+{
+	int deven;
+
+	deven = pci_read_config(bridge_dev, AGP_I915_DEVEN, 4);
+	if ((deven & AGP_SB_DEVEN_D2EN) == AGP_SB_DEVEN_D2EN_DISABLED)
+		return (ENXIO);
+	return (0);
+}
+
+static void
+agp_82852_set_desc(device_t dev, const struct agp_i810_match *match)
+{
+
+	switch (pci_read_config(dev, AGP_I85X_CAPID, 1)) {
+	case AGP_I855_GME:
+		device_set_desc(dev,
+		    "Intel 82855GME (855GME GMCH) SVGA controller");
+		break;
+	case AGP_I855_GM:
+		device_set_desc(dev,
+		    "Intel 82855GM (855GM GMCH) SVGA controller");
+		break;
+	case AGP_I852_GME:
+		device_set_desc(dev,
+		    "Intel 82852GME (852GME GMCH) SVGA controller");
+		break;
+	case AGP_I852_GM:
+		device_set_desc(dev,
+		    "Intel 82852GM (852GM GMCH) SVGA controller");
+		break;
+	default:
+		device_set_desc(dev,
+		    "Intel 8285xM (85xGM GMCH) SVGA controller");
+		break;
+	}
+}
+
+static void
+agp_i810_set_desc(device_t dev, const struct agp_i810_match *match)
+{
+
+	device_set_desc(dev, match->name);
+}
+
+static int
 agp_i810_probe(device_t dev)
 {
 	device_t bdev;
 	const struct agp_i810_match *match;
-	u_int8_t smram;
-	int gcc1, deven;
+	int err;
 
 	if (resource_disabled("agp", device_get_unit(dev)))
 		return (ENXIO);
 	match = agp_i810_match(dev);
 	if (match == NULL)
-		return ENXIO;
+		return (ENXIO);
 
 	bdev = agp_i810_find_bridge(dev);
-	if (!bdev) {
+	if (bdev == NULL) {
 		if (bootverbose)
 			printf("I810: can't find bridge device\n");
-		return ENXIO;
+		return (ENXIO);
 	}
 
 	/*
 	 * checking whether internal graphics device has been activated.
 	 */
-	switch (match->chiptype) {
-	case CHIP_I810:
-		smram = pci_read_config(bdev, AGP_I810_SMRAM, 1);
-		if ((smram & AGP_I810_SMRAM_GMS) ==
-		    AGP_I810_SMRAM_GMS_DISABLED) {
-			if (bootverbose)
-				printf("I810: disabled, not probing\n");
-			return ENXIO;
-		}
-		break;
-	case CHIP_I830:
-	case CHIP_I855:
-		gcc1 = pci_read_config(bdev, AGP_I830_GCC1, 1);
-		if ((gcc1 & AGP_I830_GCC1_DEV2) ==
-		    AGP_I830_GCC1_DEV2_DISABLED) {
-			if (bootverbose)
-				printf("I830: disabled, not probing\n");
-			return ENXIO;
-		}
-		break;
-	case CHIP_I915:
-	case CHIP_I965:
-	case CHIP_G33:
-	case CHIP_IGD:
-	case CHIP_G4X:
-		deven = pci_read_config(bdev, AGP_I915_DEVEN, 4);
-		if ((deven & AGP_I915_DEVEN_D2F0) ==
-		    AGP_I915_DEVEN_D2F0_DISABLED) {
-			if (bootverbose)
-				printf("I915: disabled, not probing\n");
-			return ENXIO;
-		}
-		break;
+	err = match->driver->check_active(bdev);
+	if (err != 0) {
+		if (bootverbose)
+			printf("i810: disabled, not probing\n");
+		return (err);
 	}
 
-	if (match->devid == 0x35828086) {
-		switch (pci_read_config(dev, AGP_I85X_CAPID, 1)) {
-		case AGP_I855_GME:
-			device_set_desc(dev,
-			    "Intel 82855GME (855GME GMCH) SVGA controller");
-			break;
-		case AGP_I855_GM:
-			device_set_desc(dev,
-			    "Intel 82855GM (855GM GMCH) SVGA controller");
-			break;
-		case AGP_I852_GME:
-			device_set_desc(dev,
-			    "Intel 82852GME (852GME GMCH) SVGA controller");
-			break;
-		case AGP_I852_GM:
-			device_set_desc(dev,
-			    "Intel 82852GM (852GM GMCH) SVGA controller");
-			break;
-		default:
-			device_set_desc(dev,
-			    "Intel 8285xM (85xGM GMCH) SVGA controller");
-			break;
-		}
-	} else {
-		device_set_desc(dev, match->name);
-	}
-
-	return BUS_PROBE_DEFAULT;
+	match->driver->set_desc(dev, match);
+	return (BUS_PROBE_DEFAULT);
 }
 
 static void
@@ -344,391 +893,641 @@
 
 	device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n",
 	    bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL));
+	device_printf(dev, "AGP_I810_MISCC: 0x%04x\n",
+	    pci_read_config(sc->bdev, AGP_I810_MISCC, 2));
+}
 
-	switch (sc->chiptype) {
-	case CHIP_I810:
-		device_printf(dev, "AGP_I810_MISCC: 0x%04x\n",
-		    pci_read_config(sc->bdev, AGP_I810_MISCC, 2));
+static void
+agp_i830_dump_regs(device_t dev)
+{
+	struct agp_i810_softc *sc = device_get_softc(dev);
+
+	device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n",
+	    bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL));
+	device_printf(dev, "AGP_I830_GCC1: 0x%02x\n",
+	    pci_read_config(sc->bdev, AGP_I830_GCC1, 1));
+}
+
+static void
+agp_i855_dump_regs(device_t dev)
+{
+	struct agp_i810_softc *sc = device_get_softc(dev);
+
+	device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n",
+	    bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL));
+	device_printf(dev, "AGP_I855_GCC1: 0x%02x\n",
+	    pci_read_config(sc->bdev, AGP_I855_GCC1, 1));
+}
+
+static void
+agp_i915_dump_regs(device_t dev)
+{
+	struct agp_i810_softc *sc = device_get_softc(dev);
+
+	device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n",
+	    bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL));
+	device_printf(dev, "AGP_I855_GCC1: 0x%02x\n",
+	    pci_read_config(sc->bdev, AGP_I855_GCC1, 1));
+	device_printf(dev, "AGP_I915_MSAC: 0x%02x\n",
+	    pci_read_config(sc->bdev, AGP_I915_MSAC, 1));
+}
+
+static void
+agp_i965_dump_regs(device_t dev)
+{
+	struct agp_i810_softc *sc = device_get_softc(dev);
+
+	device_printf(dev, "AGP_I965_PGTBL_CTL2: %08x\n",
+	    bus_read_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2));
+	device_printf(dev, "AGP_I855_GCC1: 0x%02x\n",
+	    pci_read_config(sc->bdev, AGP_I855_GCC1, 1));
+	device_printf(dev, "AGP_I965_MSAC: 0x%02x\n",
+	    pci_read_config(sc->bdev, AGP_I965_MSAC, 1));
+}
+
+static void
+agp_sb_dump_regs(device_t dev)
+{
+	struct agp_i810_softc *sc = device_get_softc(dev);
+
+	device_printf(dev, "AGP_SNB_GFX_MODE: %08x\n",
+	    bus_read_4(sc->sc_res[0], AGP_SNB_GFX_MODE));
+	device_printf(dev, "AGP_SNB_GCC1: 0x%04x\n",
+	    pci_read_config(sc->bdev, AGP_SNB_GCC1, 2));
+}
+
+static int
+agp_i810_get_stolen_size(device_t dev)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	sc->stolen = 0;
+	sc->stolen_size = 0;
+	return (0);
+}
+
+static int
+agp_i830_get_stolen_size(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	unsigned int gcc1;
+
+	sc = device_get_softc(dev);
+
+	gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 1);
+	switch (gcc1 & AGP_I830_GCC1_GMS) {
+	case AGP_I830_GCC1_GMS_STOLEN_512:
+		sc->stolen = (512 - 132) * 1024 / 4096;
+		sc->stolen_size = 512 * 1024;
 		break;
-	case CHIP_I830:
-		device_printf(dev, "AGP_I830_GCC1: 0x%02x\n",
-		    pci_read_config(sc->bdev, AGP_I830_GCC1, 1));
+	case AGP_I830_GCC1_GMS_STOLEN_1024: 
+		sc->stolen = (1024 - 132) * 1024 / 4096;
+		sc->stolen_size = 1024 * 1024;
 		break;
+	case AGP_I830_GCC1_GMS_STOLEN_8192: 
+		sc->stolen = (8192 - 132) * 1024 / 4096;
+		sc->stolen_size = 8192 * 1024;
+		break;
+	default:
+		sc->stolen = 0;
+		device_printf(dev,
+		    "unknown memory configuration, disabling (GCC1 %x)\n",
+		    gcc1);
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static int
+agp_i915_get_stolen_size(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	unsigned int gcc1, stolen, gtt_size;
+
+	sc = device_get_softc(dev);
+
+	/*
+	 * Stolen memory is set up at the beginning of the aperture by
+	 * the BIOS, consisting of the GATT followed by 4kb for the
+	 * BIOS display.
+	 */
+	switch (sc->match->driver->chiptype) {
 	case CHIP_I855:
-		device_printf(dev, "AGP_I855_GCC1: 0x%02x\n",
-		    pci_read_config(sc->bdev, AGP_I855_GCC1, 1));
+		gtt_size = 128;
 		break;
 	case CHIP_I915:
+		gtt_size = 256;
+		break;
 	case CHIP_I965:
+		switch (bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL) &
+			AGP_I810_PGTBL_SIZE_MASK) {
+		case AGP_I810_PGTBL_SIZE_128KB:
+			gtt_size = 128;
+			break;
+		case AGP_I810_PGTBL_SIZE_256KB:
+			gtt_size = 256;
+			break;
+		case AGP_I810_PGTBL_SIZE_512KB:
+			gtt_size = 512;
+			break;
+		case AGP_I965_PGTBL_SIZE_1MB:
+			gtt_size = 1024;
+			break;
+		case AGP_I965_PGTBL_SIZE_2MB:
+			gtt_size = 2048;
+			break;
+		case AGP_I965_PGTBL_SIZE_1_5MB:
+			gtt_size = 1024 + 512;
+			break;
+		default:
+			device_printf(dev, "Bad PGTBL size\n");
+			return (EINVAL);
+		}
+		break;
 	case CHIP_G33:
+		gcc1 = pci_read_config(sc->bdev, AGP_I855_GCC1, 2);
+		switch (gcc1 & AGP_G33_MGGC_GGMS_MASK) {
+		case AGP_G33_MGGC_GGMS_SIZE_1M:
+			gtt_size = 1024;
+			break;
+		case AGP_G33_MGGC_GGMS_SIZE_2M:
+			gtt_size = 2048;
+			break;
+		default:
+			device_printf(dev, "Bad PGTBL size\n");
+			return (EINVAL);
+		}
+		break;
 	case CHIP_IGD:
 	case CHIP_G4X:
-		device_printf(dev, "AGP_I855_GCC1: 0x%02x\n",
-		    pci_read_config(sc->bdev, AGP_I855_GCC1, 1));
-		device_printf(dev, "AGP_I915_MSAC: 0x%02x\n",
-		    pci_read_config(sc->bdev, AGP_I915_MSAC, 1));
+		gtt_size = 0;
+		break;
+	default:
+		device_printf(dev, "Bad chiptype\n");
+		return (EINVAL);
+	}
+
+	/* GCC1 is called MGGC on i915+ */
+	gcc1 = pci_read_config(sc->bdev, AGP_I855_GCC1, 1);
+	switch (gcc1 & AGP_I855_GCC1_GMS) {
+	case AGP_I855_GCC1_GMS_STOLEN_1M:
+		stolen = 1024;
+		break;
+	case AGP_I855_GCC1_GMS_STOLEN_4M:
+		stolen = 4 * 1024;
+		break;
+	case AGP_I855_GCC1_GMS_STOLEN_8M:
+		stolen = 8 * 1024;
+		break;
+	case AGP_I855_GCC1_GMS_STOLEN_16M:
+		stolen = 16 * 1024;
+		break;
+	case AGP_I855_GCC1_GMS_STOLEN_32M:
+		stolen = 32 * 1024;
+		break;
+	case AGP_I915_GCC1_GMS_STOLEN_48M:
+		stolen = sc->match->driver->gen > 2 ? 48 * 1024 : 0;
+		break;
+	case AGP_I915_GCC1_GMS_STOLEN_64M:
+		stolen = sc->match->driver->gen > 2 ? 64 * 1024 : 0;
+		break;
+	case AGP_G33_GCC1_GMS_STOLEN_128M:
+		stolen = sc->match->driver->gen > 2 ? 128 * 1024 : 0;
+		break;
+	case AGP_G33_GCC1_GMS_STOLEN_256M:
+		stolen = sc->match->driver->gen > 2 ? 256 * 1024 : 0;
+		break;
+	case AGP_G4X_GCC1_GMS_STOLEN_96M:
+		if (sc->match->driver->chiptype == CHIP_I965 ||
+		    sc->match->driver->chiptype == CHIP_G4X)
+			stolen = 96 * 1024;
+		else
+			stolen = 0;
+		break;
+	case AGP_G4X_GCC1_GMS_STOLEN_160M:
+		if (sc->match->driver->chiptype == CHIP_I965 ||
+		    sc->match->driver->chiptype == CHIP_G4X)
+			stolen = 160 * 1024;
+		else
+			stolen = 0;
+		break;
+	case AGP_G4X_GCC1_GMS_STOLEN_224M:
+		if (sc->match->driver->chiptype == CHIP_I965 ||
+		    sc->match->driver->chiptype == CHIP_G4X)
+			stolen = 224 * 1024;
+		else
+			stolen = 0;
+		break;
+	case AGP_G4X_GCC1_GMS_STOLEN_352M:
+		if (sc->match->driver->chiptype == CHIP_I965 ||
+		    sc->match->driver->chiptype == CHIP_G4X)
+			stolen = 352 * 1024;
+		else
+			stolen = 0;
+		break;
+	default:
+		device_printf(dev,
+		    "unknown memory configuration, disabling (GCC1 %x)\n",
+		    gcc1);
+		return (EINVAL);
+	}
+
+	gtt_size += 4;
+	sc->stolen_size = stolen * 1024;
+	sc->stolen = (stolen - gtt_size) * 1024 / 4096;
+
+	return (0);
+}
+
+static int
+agp_sb_get_stolen_size(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint16_t gmch_ctl;
+
+	sc = device_get_softc(dev);
+	gmch_ctl = pci_read_config(sc->bdev, AGP_SNB_GCC1, 2);
+	switch (gmch_ctl & AGP_SNB_GMCH_GMS_STOLEN_MASK) {
+	case AGP_SNB_GMCH_GMS_STOLEN_32M:
+		sc->stolen_size = 32 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_64M:
+		sc->stolen_size = 64 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_96M:
+		sc->stolen_size = 96 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_128M:
+		sc->stolen_size = 128 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_160M:
+		sc->stolen_size = 160 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_192M:
+		sc->stolen_size = 192 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_224M:
+		sc->stolen_size = 224 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_256M:
+		sc->stolen_size = 256 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_288M:
+		sc->stolen_size = 288 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_320M:
+		sc->stolen_size = 320 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_352M:
+		sc->stolen_size = 352 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_384M:
+		sc->stolen_size = 384 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_416M:
+		sc->stolen_size = 416 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_448M:
+		sc->stolen_size = 448 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_480M:
+		sc->stolen_size = 480 * 1024 * 1024;
+		break;
+	case AGP_SNB_GMCH_GMS_STOLEN_512M:
+		sc->stolen_size = 512 * 1024 * 1024;
 		break;
 	}
-	device_printf(dev, "Aperture resource size: %d bytes\n",
-	    AGP_GET_APERTURE(dev));
+	sc->stolen = (sc->stolen_size - 4) / 4096;
+	return (0);
+}
+
+static int
+agp_i810_get_gtt_mappable_entries(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t ap;
+	uint16_t miscc;
+
+	sc = device_get_softc(dev);
+	miscc = pci_read_config(sc->bdev, AGP_I810_MISCC, 2);
+	if ((miscc & AGP_I810_MISCC_WINSIZE) == AGP_I810_MISCC_WINSIZE_32)
+		ap = 32;
+	else
+		ap = 64;
+	sc->gtt_mappable_entries = (ap * 1024 * 1024) >> AGP_PAGE_SHIFT;
+	return (0);
+}
+
+static int
+agp_i830_get_gtt_mappable_entries(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t ap;
+	uint16_t gmch_ctl;
+
+	sc = device_get_softc(dev);
+	gmch_ctl = pci_read_config(sc->bdev, AGP_I830_GCC1, 2);
+	if ((gmch_ctl & AGP_I830_GCC1_GMASIZE) == AGP_I830_GCC1_GMASIZE_64)
+		ap = 64;
+	else
+		ap = 128;
+	sc->gtt_mappable_entries = (ap * 1024 * 1024) >> AGP_PAGE_SHIFT;
+	return (0);
+}
+
+static int
+agp_i915_get_gtt_mappable_entries(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t ap;
+
+	sc = device_get_softc(dev);
+	ap = AGP_GET_APERTURE(dev);
+	sc->gtt_mappable_entries = ap >> AGP_PAGE_SHIFT;
+	return (0);
+}
+
+static int
+agp_i810_get_gtt_total_entries(device_t dev)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	sc->gtt_total_entries = sc->gtt_mappable_entries;
+	return (0);
+}
+
+static int
+agp_i965_get_gtt_total_entries(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t pgetbl_ctl;
+	int error;
+
+	sc = device_get_softc(dev);
+	error = 0;
+	pgetbl_ctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
+	switch (pgetbl_ctl & AGP_I810_PGTBL_SIZE_MASK) {
+	case AGP_I810_PGTBL_SIZE_128KB:
+		sc->gtt_total_entries = 128 * 1024 / 4;
+		break;
+	case AGP_I810_PGTBL_SIZE_256KB:
+		sc->gtt_total_entries = 256 * 1024 / 4;
+		break;
+	case AGP_I810_PGTBL_SIZE_512KB:
+		sc->gtt_total_entries = 512 * 1024 / 4;
+		break;
+	/* GTT pagetable sizes bigger than 512KB are not possible on G33! */
+	case AGP_I810_PGTBL_SIZE_1MB:
+		sc->gtt_total_entries = 1024 * 1024 / 4;
+		break;
+	case AGP_I810_PGTBL_SIZE_2MB:
+		sc->gtt_total_entries = 2 * 1024 * 1024 / 4;
+		break;
+	case AGP_I810_PGTBL_SIZE_1_5MB:
+		sc->gtt_total_entries = (1024 + 512) * 1024 / 4;
+		break;
+	default:
+		device_printf(dev, "Unknown page table size\n");
+		error = ENXIO;
+	}
+	return (error);
+}
+
+static void
+agp_gen5_adjust_pgtbl_size(device_t dev, uint32_t sz)
+{
+	struct agp_i810_softc *sc;
+	uint32_t pgetbl_ctl, pgetbl_ctl2;
+
+	sc = device_get_softc(dev);
+
+	/* Disable per-process page table. */
+	pgetbl_ctl2 = bus_read_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2);
+	pgetbl_ctl2 &= ~AGP_I810_PGTBL_ENABLED;
+	bus_write_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2, pgetbl_ctl2);
+
+	/* Write the new ggtt size. */
+	pgetbl_ctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
+	pgetbl_ctl &= ~AGP_I810_PGTBL_SIZE_MASK;
+	pgetbl_ctl |= sz;
+	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgetbl_ctl);
+}
+
+static int
+agp_gen5_get_gtt_total_entries(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint16_t gcc1;
+
+	sc = device_get_softc(dev);
+
+	gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 2);
+	switch (gcc1 & AGP_G4x_GCC1_SIZE_MASK) {
+	case AGP_G4x_GCC1_SIZE_1M:
+	case AGP_G4x_GCC1_SIZE_VT_1M:
+		agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_1MB);
+		break;
+	case AGP_G4x_GCC1_SIZE_VT_1_5M:
+		agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_1_5MB);
+		break;
+	case AGP_G4x_GCC1_SIZE_2M:
+	case AGP_G4x_GCC1_SIZE_VT_2M:
+		agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_2MB);
+		break;
+	default:
+		device_printf(dev, "Unknown page table size\n");
+		return (ENXIO);
+	}
+
+	return (agp_i965_get_gtt_total_entries(dev));
+}
+
+static int
+agp_sb_get_gtt_total_entries(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint16_t gcc1;
+
+	sc = device_get_softc(dev);
+
+	gcc1 = pci_read_config(sc->bdev, AGP_SNB_GCC1, 2);
+	switch (gcc1 & AGP_SNB_GTT_SIZE_MASK) {
+	default:
+	case AGP_SNB_GTT_SIZE_0M:
+		printf("Bad GTT size mask: 0x%04x\n", gcc1);
+		return (ENXIO);
+	case AGP_SNB_GTT_SIZE_1M:
+		sc->gtt_total_entries = 1024 * 1024 / 4;
+		break;
+	case AGP_SNB_GTT_SIZE_2M:
+		sc->gtt_total_entries = 2 * 1024 * 1024 / 4;
+		break;
+	}
+	return (0);
+}
+
+static int
+agp_i810_install_gatt(device_t dev)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	/* Some i810s have on-chip memory called dcache. */
+	if ((bus_read_1(sc->sc_res[0], AGP_I810_DRT) & AGP_I810_DRT_POPULATED)
+	    != 0)
+		sc->dcache_size = 4 * 1024 * 1024;
+	else
+		sc->dcache_size = 0;
+
+	/* According to the specs the gatt on the i810 must be 64k. */
+	sc->gatt->ag_virtual = contigmalloc(64 * 1024, M_AGP, 0, 0, ~0,
+	    PAGE_SIZE, 0);
+	if (sc->gatt->ag_virtual == NULL) {
+		if (bootverbose)
+			device_printf(dev, "contiguous allocation failed\n");
+		return (ENOMEM);
+	}
+
+	bzero(sc->gatt->ag_virtual, sc->gatt->ag_entries * sizeof(u_int32_t));
+	sc->gatt->ag_physical = vtophys((vm_offset_t)sc->gatt->ag_virtual);
+	agp_flush_cache();
+	/* Install the GATT. */
+	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL,
+	    sc->gatt->ag_physical | 1);
+	return (0);
+}
+
+static int
+agp_i830_install_gatt(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t pgtblctl;
+
+	sc = device_get_softc(dev);
+
+	/*
+	 * The i830 automatically initializes the 128k gatt on boot.
+	 * GATT address is already in there, make sure it's enabled.
+	 */
+	pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
+	pgtblctl |= 1;
+	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl);
+	
+	sc->gatt->ag_physical = pgtblctl & ~1;
+	return (0);
 }
 
 static int
 agp_i810_attach(device_t dev)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
-	struct agp_gatt *gatt;
-	const struct agp_i810_match *match;
+	struct agp_i810_softc *sc;
 	int error;
 
+	sc = device_get_softc(dev);
 	sc->bdev = agp_i810_find_bridge(dev);
-	if (!sc->bdev)
-		return ENOENT;
+	if (sc->bdev == NULL)
+		return (ENOENT);
 
-	match = agp_i810_match(dev);
-	sc->chiptype = match->chiptype;
+	sc->match = agp_i810_match(dev);
 
-	switch (sc->chiptype) {
-	case CHIP_I810:
-	case CHIP_I830:
-	case CHIP_I855:
-		sc->sc_res_spec = agp_i810_res_spec;
-		agp_set_aperture_resource(dev, AGP_APBASE);
-		break;
-	case CHIP_I915:
-	case CHIP_G33:
-	case CHIP_IGD:
-		sc->sc_res_spec = agp_i915_res_spec;
-		agp_set_aperture_resource(dev, AGP_I915_GMADR);
-		break;
-	case CHIP_I965:
-	case CHIP_G4X:
-		sc->sc_res_spec = agp_i965_res_spec;
-		agp_set_aperture_resource(dev, AGP_I915_GMADR);
-		break;
+	agp_set_aperture_resource(dev, sc->match->driver->gen <= 2 ?
+	    AGP_APBASE : AGP_I915_GMADR);
+	error = agp_generic_attach(dev);
+	if (error)
+		return (error);
+
+	if (ptoa((vm_paddr_t)Maxmem) >
+	    (1ULL << sc->match->driver->busdma_addr_mask_sz) - 1) {
+		device_printf(dev, "agp_i810 does not support physical "
+		    "memory above %ju.\n", (uintmax_t)(1ULL <<
+		    sc->match->driver->busdma_addr_mask_sz) - 1);
+		return (ENOENT);
 	}
 
-	error = agp_generic_attach(dev);
-	if (error)
-		return error;
-
-	if (sc->chiptype != CHIP_I965 && sc->chiptype != CHIP_G33 &&
-	    sc->chiptype != CHIP_IGD && sc->chiptype != CHIP_G4X &&
-	    ptoa((vm_paddr_t)Maxmem) > 0xfffffffful)
-	{
-		device_printf(dev, "agp_i810.c does not support physical "
-		    "memory above 4GB.\n");
-		return ENOENT;
-	}
-
-	if (bus_alloc_resources(dev, sc->sc_res_spec, sc->sc_res)) {
+	if (bus_alloc_resources(dev, sc->match->driver->res_spec, sc->sc_res)) {
 		agp_generic_detach(dev);
-		return ENODEV;
+		return (ENODEV);
 	}
 
 	sc->initial_aperture = AGP_GET_APERTURE(dev);
+	sc->gatt = malloc(sizeof(struct agp_gatt), M_AGP, M_WAITOK);
+	sc->gatt->ag_entries = AGP_GET_APERTURE(dev) >> AGP_PAGE_SHIFT;
 
-	gatt = malloc( sizeof(struct agp_gatt), M_AGP, M_NOWAIT);
-	if (!gatt) {
-		bus_release_resources(dev, sc->sc_res_spec, sc->sc_res);
- 		agp_generic_detach(dev);
- 		return ENOMEM;
-	}
-	sc->gatt = gatt;
-
-	gatt->ag_entries = AGP_GET_APERTURE(dev) >> AGP_PAGE_SHIFT;
-
-	if ( sc->chiptype == CHIP_I810 ) {
-		/* Some i810s have on-chip memory called dcache */
-		if (bus_read_1(sc->sc_res[0], AGP_I810_DRT) &
-		    AGP_I810_DRT_POPULATED)
-			sc->dcache_size = 4 * 1024 * 1024;
-		else
-			sc->dcache_size = 0;
-
-		/* According to the specs the gatt on the i810 must be 64k */
-		gatt->ag_virtual = contigmalloc( 64 * 1024, M_AGP, 0, 
-					0, ~0, PAGE_SIZE, 0);
-		if (!gatt->ag_virtual) {
-			if (bootverbose)
-				device_printf(dev, "contiguous allocation failed\n");
-			bus_release_resources(dev, sc->sc_res_spec,
-			    sc->sc_res);
-			free(gatt, M_AGP);
-			agp_generic_detach(dev);
-			return ENOMEM;
-		}
-		bzero(gatt->ag_virtual, gatt->ag_entries * sizeof(u_int32_t));
-	
-		gatt->ag_physical = vtophys((vm_offset_t) gatt->ag_virtual);
-		agp_flush_cache();
-		/* Install the GATT. */
-		bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL,
-		    gatt->ag_physical | 1);
-	} else if ( sc->chiptype == CHIP_I830 ) {
-		/* The i830 automatically initializes the 128k gatt on boot. */
-		unsigned int gcc1, pgtblctl;
-		
-		gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 1);
-		switch (gcc1 & AGP_I830_GCC1_GMS) {
-			case AGP_I830_GCC1_GMS_STOLEN_512:
-				sc->stolen = (512 - 132) * 1024 / 4096;
-				break;
-			case AGP_I830_GCC1_GMS_STOLEN_1024: 
-				sc->stolen = (1024 - 132) * 1024 / 4096;
-				break;
-			case AGP_I830_GCC1_GMS_STOLEN_8192: 
-				sc->stolen = (8192 - 132) * 1024 / 4096;
-				break;
-			default:
-				sc->stolen = 0;
-				device_printf(dev, "unknown memory configuration, disabling\n");
-				bus_release_resources(dev, sc->sc_res_spec,
-				    sc->sc_res);
-				free(gatt, M_AGP);
-				agp_generic_detach(dev);
-				return EINVAL;
-		}
-
-		/* GATT address is already in there, make sure it's enabled */
-		pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
-		pgtblctl |= 1;
-		bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl);
-
-		gatt->ag_physical = pgtblctl & ~1;
-	} else if (sc->chiptype == CHIP_I855 || sc->chiptype == CHIP_I915 ||
-	    sc->chiptype == CHIP_I965 || sc->chiptype == CHIP_G33 ||
-	    sc->chiptype == CHIP_IGD || sc->chiptype == CHIP_G4X) {
-		unsigned int gcc1, pgtblctl, stolen, gtt_size;
-
-		/* Stolen memory is set up at the beginning of the aperture by
-		 * the BIOS, consisting of the GATT followed by 4kb for the
-		 * BIOS display.
-		 */
-		switch (sc->chiptype) {
-		case CHIP_I855:
-			gtt_size = 128;
-			break;
-		case CHIP_I915:
-			gtt_size = 256;
-			break;
-		case CHIP_I965:
-			switch (bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL) &
-			    AGP_I810_PGTBL_SIZE_MASK) {
-			case AGP_I810_PGTBL_SIZE_128KB:
-				gtt_size = 128;
-				break;
-			case AGP_I810_PGTBL_SIZE_256KB:
-				gtt_size = 256;
-				break;
-			case AGP_I810_PGTBL_SIZE_512KB:
-				gtt_size = 512;
-				break;
-			case AGP_I965_PGTBL_SIZE_1MB:
-				gtt_size = 1024;
-				break;
-			case AGP_I965_PGTBL_SIZE_2MB:
-				gtt_size = 2048;
-				break;
-			case AGP_I965_PGTBL_SIZE_1_5MB:
-				gtt_size = 1024 + 512;
-				break;
-			default:
-				device_printf(dev, "Bad PGTBL size\n");
-				bus_release_resources(dev, sc->sc_res_spec,
-				    sc->sc_res);
-				free(gatt, M_AGP);
-				agp_generic_detach(dev);
-				return EINVAL;
-			}
-			break;
-		case CHIP_G33:
-			gcc1 = pci_read_config(sc->bdev, AGP_I855_GCC1, 2);
-			switch (gcc1 & AGP_G33_MGGC_GGMS_MASK) {
-			case AGP_G33_MGGC_GGMS_SIZE_1M:
-				gtt_size = 1024;
-				break;
-			case AGP_G33_MGGC_GGMS_SIZE_2M:
-				gtt_size = 2048;
-				break;
-			default:
-				device_printf(dev, "Bad PGTBL size\n");
-				bus_release_resources(dev, sc->sc_res_spec,
-				    sc->sc_res);
-				free(gatt, M_AGP);
-				agp_generic_detach(dev);
-				return EINVAL;
-			}
-			break;
-		case CHIP_IGD:
-		case CHIP_G4X:
-			gtt_size = 0;
-			break;
-		default:
-			device_printf(dev, "Bad chiptype\n");
-			bus_release_resources(dev, sc->sc_res_spec,
-			    sc->sc_res);
-			free(gatt, M_AGP);
-			agp_generic_detach(dev);
-			return EINVAL;
-		}
-
-		/* GCC1 is called MGGC on i915+ */
-		gcc1 = pci_read_config(sc->bdev, AGP_I855_GCC1, 1);
-		switch (gcc1 & AGP_I855_GCC1_GMS) {
-		case AGP_I855_GCC1_GMS_STOLEN_1M:
-			stolen = 1024;
-			break;
-		case AGP_I855_GCC1_GMS_STOLEN_4M:
-			stolen = 4 * 1024;
-			break;
-		case AGP_I855_GCC1_GMS_STOLEN_8M:
-			stolen = 8 * 1024;
-			break;
-		case AGP_I855_GCC1_GMS_STOLEN_16M:
-			stolen = 16 * 1024;
-			break;
-		case AGP_I855_GCC1_GMS_STOLEN_32M:
-			stolen = 32 * 1024;
-			break;
-		case AGP_I915_GCC1_GMS_STOLEN_48M:
-			if (sc->chiptype == CHIP_I915 ||
-			    sc->chiptype == CHIP_I965 ||
-			    sc->chiptype == CHIP_G33 ||
-			    sc->chiptype == CHIP_IGD ||
-			    sc->chiptype == CHIP_G4X) {
-				stolen = 48 * 1024;
-			} else {
-				stolen = 0;
-			}
-			break;
-		case AGP_I915_GCC1_GMS_STOLEN_64M:
-			if (sc->chiptype == CHIP_I915 ||
-			    sc->chiptype == CHIP_I965 ||
-			    sc->chiptype == CHIP_G33 ||
-			    sc->chiptype == CHIP_IGD ||
-			    sc->chiptype == CHIP_G4X) {
-				stolen = 64 * 1024;
-			} else {
-				stolen = 0;
-			}
-			break;
-		case AGP_G33_GCC1_GMS_STOLEN_128M:
-			if (sc->chiptype == CHIP_I965 ||
-			    sc->chiptype == CHIP_G33 ||
-			    sc->chiptype == CHIP_IGD ||
-			    sc->chiptype == CHIP_G4X) {
-				stolen = 128 * 1024;
-			} else {
-				stolen = 0;
-			}
-			break;
-		case AGP_G33_GCC1_GMS_STOLEN_256M:
-			if (sc->chiptype == CHIP_I965 ||
-			    sc->chiptype == CHIP_G33 ||
-			    sc->chiptype == CHIP_IGD ||
-			    sc->chiptype == CHIP_G4X) {
-				stolen = 256 * 1024;
-			} else {
-				stolen = 0;
-			}
-			break;
-		case AGP_G4X_GCC1_GMS_STOLEN_96M:
-			if (sc->chiptype == CHIP_I965 ||
-			    sc->chiptype == CHIP_G4X) {
-				stolen = 96 * 1024;
-			} else {
-				stolen = 0;
-			}
-			break;
-		case AGP_G4X_GCC1_GMS_STOLEN_160M:
-			if (sc->chiptype == CHIP_I965 ||
-			    sc->chiptype == CHIP_G4X) {
-				stolen = 160 * 1024;
-			} else {
-				stolen = 0;
-			}
-			break;
-		case AGP_G4X_GCC1_GMS_STOLEN_224M:
-			if (sc->chiptype == CHIP_I965 ||
-			    sc->chiptype == CHIP_G4X) {
-				stolen = 224 * 1024;
-			} else {
-				stolen = 0;
-			}
-			break;
-		case AGP_G4X_GCC1_GMS_STOLEN_352M:
-			if (sc->chiptype == CHIP_I965 ||
-			    sc->chiptype == CHIP_G4X) {
-				stolen = 352 * 1024;
-			} else {
-				stolen = 0;
-			}
-			break;
-		default:
-			device_printf(dev, "unknown memory configuration, "
-			    "disabling\n");
-			bus_release_resources(dev, sc->sc_res_spec,
-			    sc->sc_res);
-			free(gatt, M_AGP);
-			agp_generic_detach(dev);
-			return EINVAL;
-		}
-
-		gtt_size += 4;
-
-		sc->stolen = (stolen - gtt_size) * 1024 / 4096;
-
-		/* GATT address is already in there, make sure it's enabled */
-		pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
-		pgtblctl |= 1;
-		bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl);
-
-		gatt->ag_physical = pgtblctl & ~1;
+	if ((error = sc->match->driver->get_stolen_size(dev)) != 0 ||
+	    (error = sc->match->driver->install_gatt(dev)) != 0 ||
+	    (error = sc->match->driver->get_gtt_mappable_entries(dev)) != 0 ||
+	    (error = sc->match->driver->get_gtt_total_entries(dev)) != 0 ||
+	    (error = sc->match->driver->chipset_flush_setup(dev)) != 0) {
+		bus_release_resources(dev, sc->match->driver->res_spec,
+		    sc->sc_res);
+		free(sc->gatt, M_AGP);
+		agp_generic_detach(dev);
+		return (error);
 	}
 
+	intel_agp = dev;
 	device_printf(dev, "aperture size is %dM",
 	    sc->initial_aperture / 1024 / 1024);
 	if (sc->stolen > 0)
 		printf(", detected %dk stolen memory\n", sc->stolen * 4);
 	else
 		printf("\n");
+	if (bootverbose) {
+		sc->match->driver->dump_regs(dev);
+		device_printf(dev, "Mappable GTT entries: %d\n",
+		    sc->gtt_mappable_entries);
+		device_printf(dev, "Total GTT entries: %d\n",
+		    sc->gtt_total_entries);
+	}
+	return (0);
+}
 
-	if (0)
-		agp_i810_dump_regs(dev);
+static void
+agp_i810_deinstall_gatt(device_t dev)
+{
+	struct agp_i810_softc *sc;
 
-	return 0;
+	sc = device_get_softc(dev);
+	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, 0);
+	contigfree(sc->gatt->ag_virtual, 64 * 1024, M_AGP);
+}
+
+static void
+agp_i830_deinstall_gatt(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	unsigned int pgtblctl;
+
+	sc = device_get_softc(dev);
+	pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
+	pgtblctl &= ~1;
+	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl);
 }
 
 static int
 agp_i810_detach(device_t dev)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
+	struct agp_i810_softc *sc;
 
+	sc = device_get_softc(dev);
 	agp_free_cdev(dev);
 
 	/* Clear the GATT base. */
-	if ( sc->chiptype == CHIP_I810 ) {
-		bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, 0);
-	} else {
-		unsigned int pgtblctl;
-		pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
-		pgtblctl &= ~1;
-		bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl);
-	}
+	sc->match->driver->deinstall_gatt(dev);
+
+	sc->match->driver->chipset_flush_teardown(dev);
 
 	/* Put the aperture back the way it started. */
 	AGP_SET_APERTURE(dev, sc->initial_aperture);
 
-	if ( sc->chiptype == CHIP_I810 ) {
-		contigfree(sc->gatt->ag_virtual, 64 * 1024, M_AGP);
-	}
 	free(sc->gatt, M_AGP);
-
-	bus_release_resources(dev, sc->sc_res_spec, sc->sc_res);
+	bus_release_resources(dev, sc->match->driver->res_spec, sc->sc_res);
 	agp_free_res(dev);
 
-	return 0;
+	return (0);
 }
 
 static int
@@ -758,143 +1557,338 @@
 static int
 agp_i810_set_aperture(device_t dev, u_int32_t aperture)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
-	u_int16_t miscc, gcc1;
+	struct agp_i810_softc *sc;
+	u_int16_t miscc;
 
-	switch (sc->chiptype) {
-	case CHIP_I810:
-		/*
-		 * Double check for sanity.
-		 */
-		if (aperture != 32 * 1024 * 1024 && aperture != 64 * 1024 * 1024) {
-			device_printf(dev, "bad aperture size %d\n", aperture);
-			return EINVAL;
-		}
-
-		miscc = pci_read_config(sc->bdev, AGP_I810_MISCC, 2);
-		miscc &= ~AGP_I810_MISCC_WINSIZE;
-		if (aperture == 32 * 1024 * 1024)
-			miscc |= AGP_I810_MISCC_WINSIZE_32;
-		else
-			miscc |= AGP_I810_MISCC_WINSIZE_64;
-	
-		pci_write_config(sc->bdev, AGP_I810_MISCC, miscc, 2);
-		break;
-	case CHIP_I830:
-		if (aperture != 64 * 1024 * 1024 &&
-		    aperture != 128 * 1024 * 1024) {
-			device_printf(dev, "bad aperture size %d\n", aperture);
-			return EINVAL;
-		}
-		gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 2);
-		gcc1 &= ~AGP_I830_GCC1_GMASIZE;
-		if (aperture == 64 * 1024 * 1024)
-			gcc1 |= AGP_I830_GCC1_GMASIZE_64;
-		else
-			gcc1 |= AGP_I830_GCC1_GMASIZE_128;
-
-		pci_write_config(sc->bdev, AGP_I830_GCC1, gcc1, 2);
-		break;
-	case CHIP_I855:
-	case CHIP_I915:
-	case CHIP_I965:
-	case CHIP_G33:
-	case CHIP_IGD:
-	case CHIP_G4X:
-		return agp_generic_set_aperture(dev, aperture);
+	sc = device_get_softc(dev);
+	/*
+	 * Double check for sanity.
+	 */
+	if (aperture != 32 * 1024 * 1024 && aperture != 64 * 1024 * 1024) {
+		device_printf(dev, "bad aperture size %d\n", aperture);
+		return (EINVAL);
 	}
 
-	return 0;
+	miscc = pci_read_config(sc->bdev, AGP_I810_MISCC, 2);
+	miscc &= ~AGP_I810_MISCC_WINSIZE;
+	if (aperture == 32 * 1024 * 1024)
+		miscc |= AGP_I810_MISCC_WINSIZE_32;
+	else
+		miscc |= AGP_I810_MISCC_WINSIZE_64;
+	
+	pci_write_config(sc->bdev, AGP_I810_MISCC, miscc, 2);
+	return (0);
+}
+
+static int
+agp_i830_set_aperture(device_t dev, u_int32_t aperture)
+{
+	struct agp_i810_softc *sc;
+	u_int16_t gcc1;
+
+	sc = device_get_softc(dev);
+
+	if (aperture != 64 * 1024 * 1024 &&
+	    aperture != 128 * 1024 * 1024) {
+		device_printf(dev, "bad aperture size %d\n", aperture);
+		return (EINVAL);
+	}
+	gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 2);
+	gcc1 &= ~AGP_I830_GCC1_GMASIZE;
+	if (aperture == 64 * 1024 * 1024)
+		gcc1 |= AGP_I830_GCC1_GMASIZE_64;
+	else
+		gcc1 |= AGP_I830_GCC1_GMASIZE_128;
+
+	pci_write_config(sc->bdev, AGP_I830_GCC1, gcc1, 2);
+	return (0);
+}
+
+static int
+agp_i915_set_aperture(device_t dev, u_int32_t aperture)
+{
+
+	return (agp_generic_set_aperture(dev, aperture));
+}
+
+static int
+agp_i810_method_set_aperture(device_t dev, u_int32_t aperture)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	return (sc->match->driver->set_aperture(dev, aperture));
 }
 
 /**
- * Writes a GTT entry mapping the page at the given offset from the beginning
- * of the aperture to the given physical address.
+ * Writes a GTT entry mapping the page at the given offset from the
+ * beginning of the aperture to the given physical address.  Setup the
+ * caching mode according to flags.
+ *
+ * For gen 1, 2 and 3, GTT start is located at AGP_I810_GTT offset
+ * from corresponding BAR start. For gen 4, offset is 512KB +
+ * AGP_I810_GTT, for gen 5 and 6 it is 2MB + AGP_I810_GTT.
+ *
+ * Also, the bits of the physical page address above 4GB needs to be
+ * placed into bits 40-32 of PTE.
  */
 static void
-agp_i810_write_gtt_entry(device_t dev, int offset, vm_offset_t physical,
-    int enabled)
+agp_i810_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
+    int flags)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
-	u_int32_t pte;
+	uint32_t pte;
 
-	pte = (u_int32_t)physical | 1;
-	if (sc->chiptype == CHIP_I965 || sc->chiptype == CHIP_G33 ||
-	    sc->chiptype == CHIP_IGD || sc->chiptype == CHIP_G4X) {
-		pte |= (physical & 0x0000000f00000000ull) >> 28;
-	} else {
-		/* If we do actually have memory above 4GB on an older system,
-		 * crash cleanly rather than scribble on system memory,
-		 * so we know we need to fix it.
-		 */
-		KASSERT((pte & 0x0000000f00000000ull) == 0,
-		    (">4GB physical address in agp"));
-	}
+	pte = (u_int32_t)physical | I810_PTE_VALID;
+	if (flags == AGP_DCACHE_MEMORY)
+		pte |= I810_PTE_LOCAL;
+	else if (flags == AGP_USER_CACHED_MEMORY)
+		pte |= I830_PTE_SYSTEM_CACHED;
+	agp_i810_write_gtt(dev, index, pte);
+}
 
-	switch (sc->chiptype) {
-	case CHIP_I810:
-	case CHIP_I830:
-	case CHIP_I855:
-		bus_write_4(sc->sc_res[0],
-		    AGP_I810_GTT + (offset >> AGP_PAGE_SHIFT) * 4, pte);
-		break;
-	case CHIP_I915:
-	case CHIP_G33:
-	case CHIP_IGD:
-		bus_write_4(sc->sc_res[1],
-		    (offset >> AGP_PAGE_SHIFT) * 4, pte);
-		break;
-	case CHIP_I965:
-		bus_write_4(sc->sc_res[0],
-		    (offset >> AGP_PAGE_SHIFT) * 4 + (512 * 1024), pte);
-		break;
-	case CHIP_G4X:
-		bus_write_4(sc->sc_res[0],
-		    (offset >> AGP_PAGE_SHIFT) * 4 + (2 * 1024 * 1024), pte);
-		break;
-	}
+static void
+agp_i810_write_gtt(device_t dev, u_int index, uint32_t pte)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	bus_write_4(sc->sc_res[0], AGP_I810_GTT + index * 4, pte);
+	CTR2(KTR_AGP_I810, "810_pte %x %x", index, pte);
+}
+
+static void
+agp_i830_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
+    int flags)
+{
+	uint32_t pte;
+
+	pte = (u_int32_t)physical | I810_PTE_VALID;
+	if (flags == AGP_USER_CACHED_MEMORY)
+		pte |= I830_PTE_SYSTEM_CACHED;
+	agp_i810_write_gtt(dev, index, pte);
+}
+
+static void
+agp_i915_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
+    int flags)
+{
+	uint32_t pte;
+
+	pte = (u_int32_t)physical | I810_PTE_VALID;
+	if (flags == AGP_USER_CACHED_MEMORY)
+		pte |= I830_PTE_SYSTEM_CACHED;
+	pte |= (physical & 0x0000000f00000000ull) >> 28;
+	agp_i915_write_gtt(dev, index, pte);
+}
+
+static void
+agp_i915_write_gtt(device_t dev, u_int index, uint32_t pte)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	bus_write_4(sc->sc_res[1], index * 4, pte);
+	CTR2(KTR_AGP_I810, "915_pte %x %x", index, pte);
+}
+
+static void
+agp_i965_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
+    int flags)
+{
+	uint32_t pte;
+
+	pte = (u_int32_t)physical | I810_PTE_VALID;
+	if (flags == AGP_USER_CACHED_MEMORY)
+		pte |= I830_PTE_SYSTEM_CACHED;
+	pte |= (physical & 0x0000000f00000000ull) >> 28;
+	agp_i965_write_gtt(dev, index, pte);
+}
+
+static void
+agp_i965_write_gtt(device_t dev, u_int index, uint32_t pte)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	bus_write_4(sc->sc_res[0], index * 4 + (512 * 1024), pte);
+	CTR2(KTR_AGP_I810, "965_pte %x %x", index, pte);
+}
+
+static void
+agp_g4x_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
+    int flags)
+{
+	uint32_t pte;
+
+	pte = (u_int32_t)physical | I810_PTE_VALID;
+	if (flags == AGP_USER_CACHED_MEMORY)
+		pte |= I830_PTE_SYSTEM_CACHED;
+	pte |= (physical & 0x0000000f00000000ull) >> 28;
+	agp_g4x_write_gtt(dev, index, pte);
+}
+
+static void
+agp_g4x_write_gtt(device_t dev, u_int index, uint32_t pte)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	bus_write_4(sc->sc_res[0], index * 4 + (2 * 1024 * 1024), pte);
+	CTR2(KTR_AGP_I810, "g4x_pte %x %x", index, pte);
+}
+
+static void
+agp_sb_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
+    int flags)
+{
+	int type_mask, gfdt;
+	uint32_t pte;
+
+	pte = (u_int32_t)physical | I810_PTE_VALID;
+	type_mask = flags & ~AGP_USER_CACHED_MEMORY_GFDT;
+	gfdt = (flags & AGP_USER_CACHED_MEMORY_GFDT) != 0 ? GEN6_PTE_GFDT : 0;
+
+	if (type_mask == AGP_USER_MEMORY)
+		pte |= GEN6_PTE_UNCACHED;
+	else if (type_mask == AGP_USER_CACHED_MEMORY_LLC_MLC)
+		pte |= GEN6_PTE_LLC_MLC | gfdt;
+	else
+		pte |= GEN6_PTE_LLC | gfdt;
+
+	pte |= (physical & 0x000000ff00000000ull) >> 28;
+	agp_sb_write_gtt(dev, index, pte);
+}
+
+static void
+agp_sb_write_gtt(device_t dev, u_int index, uint32_t pte)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	bus_write_4(sc->sc_res[0], index * 4 + (2 * 1024 * 1024), pte);
+	CTR2(KTR_AGP_I810, "sb_pte %x %x", index, pte);
 }
 
 static int
 agp_i810_bind_page(device_t dev, vm_offset_t offset, vm_offset_t physical)
 {
 	struct agp_i810_softc *sc = device_get_softc(dev);
+	u_int index;
 
 	if (offset >= (sc->gatt->ag_entries << AGP_PAGE_SHIFT)) {
-		device_printf(dev, "failed: offset is 0x%08jx, shift is %d, entries is %d\n", (intmax_t)offset, AGP_PAGE_SHIFT, sc->gatt->ag_entries);
-		return EINVAL;
+		device_printf(dev, "failed: offset is 0x%08jx, "
+		    "shift is %d, entries is %d\n", (intmax_t)offset,
+		    AGP_PAGE_SHIFT, sc->gatt->ag_entries);
+		return (EINVAL);
 	}
-
-	if ( sc->chiptype != CHIP_I810 ) {
-		if ( (offset >> AGP_PAGE_SHIFT) < sc->stolen ) {
-			device_printf(dev, "trying to bind into stolen memory");
-			return EINVAL;
-		}
+	index = offset >> AGP_PAGE_SHIFT;
+	if (sc->stolen != 0 && index < sc->stolen) {
+		device_printf(dev, "trying to bind into stolen memory\n");
+		return (EINVAL);
 	}
-
-	agp_i810_write_gtt_entry(dev, offset, physical, 1);
-
-	return 0;
+	sc->match->driver->install_gtt_pte(dev, index, physical, 0);
+	return (0);
 }
 
 static int
 agp_i810_unbind_page(device_t dev, vm_offset_t offset)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
+	struct agp_i810_softc *sc;
+	u_int index;
 
+	sc = device_get_softc(dev);
 	if (offset >= (sc->gatt->ag_entries << AGP_PAGE_SHIFT))
-		return EINVAL;
+		return (EINVAL);
+	index = offset >> AGP_PAGE_SHIFT;
+	if (sc->stolen != 0 && index < sc->stolen) {
+		device_printf(dev, "trying to unbind from stolen memory\n");
+		return (EINVAL);
+	}
+	sc->match->driver->install_gtt_pte(dev, index, 0, 0);
+	return (0);
+}
 
-	if ( sc->chiptype != CHIP_I810 ) {
-		if ( (offset >> AGP_PAGE_SHIFT) < sc->stolen ) {
-			device_printf(dev, "trying to unbind from stolen memory");
-			return EINVAL;
-		}
-	}
+static u_int32_t
+agp_i810_read_gtt_pte(device_t dev, u_int index)
+{
+	struct agp_i810_softc *sc;
+	u_int32_t pte;
 
-	agp_i810_write_gtt_entry(dev, offset, 0, 0);
+	sc = device_get_softc(dev);
+	pte = bus_read_4(sc->sc_res[0], AGP_I810_GTT + index * 4);
+	return (pte);
+}
 
-	return 0;
+static u_int32_t
+agp_i915_read_gtt_pte(device_t dev, u_int index)
+{
+	struct agp_i810_softc *sc;
+	u_int32_t pte;
+
+	sc = device_get_softc(dev);
+	pte = bus_read_4(sc->sc_res[1], index * 4);
+	return (pte);
+}
+
+static u_int32_t
+agp_i965_read_gtt_pte(device_t dev, u_int index)
+{
+	struct agp_i810_softc *sc;
+	u_int32_t pte;
+
+	sc = device_get_softc(dev);
+	pte = bus_read_4(sc->sc_res[0], index * 4 + (512 * 1024));
+	return (pte);
+}
+
+static u_int32_t
+agp_g4x_read_gtt_pte(device_t dev, u_int index)
+{
+	struct agp_i810_softc *sc;
+	u_int32_t pte;
+
+	sc = device_get_softc(dev);
+	pte = bus_read_4(sc->sc_res[0], index * 4 + (2 * 1024 * 1024));
+	return (pte);
+}
+
+static vm_paddr_t
+agp_i810_read_gtt_pte_paddr(device_t dev, u_int index)
+{
+	struct agp_i810_softc *sc;
+	u_int32_t pte;
+	vm_paddr_t res;
+
+	sc = device_get_softc(dev);
+	pte = sc->match->driver->read_gtt_pte(dev, index);
+	res = pte & ~PAGE_MASK;
+	return (res);
+}
+
+static vm_paddr_t
+agp_i915_read_gtt_pte_paddr(device_t dev, u_int index)
+{
+	struct agp_i810_softc *sc;
+	u_int32_t pte;
+	vm_paddr_t res;
+
+	sc = device_get_softc(dev);
+	pte = sc->match->driver->read_gtt_pte(dev, index);
+	res = (pte & ~PAGE_MASK) | ((pte & 0xf0) << 28);
+	return (res);
+}
+
+static vm_paddr_t
+agp_sb_read_gtt_pte_paddr(device_t dev, u_int index)
+{
+	struct agp_i810_softc *sc;
+	u_int32_t pte;
+	vm_paddr_t res;
+
+	sc = device_get_softc(dev);
+	pte = sc->match->driver->read_gtt_pte(dev, index);
+	res = (pte & ~PAGE_MASK) | ((pte & 0xff0) << 28);
+	return (res);
 }
 
 /*
@@ -909,29 +1903,30 @@
 agp_i810_enable(device_t dev, u_int32_t mode)
 {
 
-	return 0;
+	return (0);
 }
 
 static struct agp_memory *
 agp_i810_alloc_memory(device_t dev, int type, vm_size_t size)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
+	struct agp_i810_softc *sc;
 	struct agp_memory *mem;
+	vm_page_t m;
 
-	if ((size & (AGP_PAGE_SIZE - 1)) != 0)
-		return 0;
+	sc = device_get_softc(dev);
 
-	if (sc->agp.as_allocated + size > sc->agp.as_maxmem)
-		return 0;
+	if ((size & (AGP_PAGE_SIZE - 1)) != 0 ||
+	    sc->agp.as_allocated + size > sc->agp.as_maxmem)
+		return (0);
 
 	if (type == 1) {
 		/*
 		 * Mapping local DRAM into GATT.
 		 */
-		if ( sc->chiptype != CHIP_I810 )
-			return 0;
+		if (sc->match->driver->chiptype != CHIP_I810)
+			return (0);
 		if (size != sc->dcache_size)
-			return 0;
+			return (0);
 	} else if (type == 2) {
 		/*
 		 * Type 2 is the contiguous physical memory type, that hands
@@ -942,13 +1937,13 @@
 		 */
 		if (size != AGP_PAGE_SIZE) {
 			if (sc->argb_cursor != NULL)
-				return 0;
+				return (0);
 
 			/* Allocate memory for ARGB cursor, if we can. */
 			sc->argb_cursor = contigmalloc(size, M_AGP,
 			   0, 0, ~0, PAGE_SIZE, 0);
 			if (sc->argb_cursor == NULL)
-				return 0;
+				return (0);
 		}
 	}
 
@@ -958,7 +1953,7 @@
 	mem->am_type = type;
 	if (type != 1 && (type != 2 || size == AGP_PAGE_SIZE))
 		mem->am_obj = vm_object_allocate(OBJT_DEFAULT,
-						 atop(round_page(size)));
+		    atop(round_page(size)));
 	else
 		mem->am_obj = 0;
 
@@ -968,8 +1963,6 @@
 			 * Allocate and wire down the page now so that we can
 			 * get its physical address.
 			 */
-			vm_page_t m;
-	
 			VM_OBJECT_LOCK(mem->am_obj);
 			m = vm_page_grab(mem->am_obj, 0, VM_ALLOC_NOBUSY |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
@@ -981,33 +1974,33 @@
 			 */
 			mem->am_physical = vtophys(sc->argb_cursor);
 		}
-	} else {
+	} else
 		mem->am_physical = 0;
-	}
 
 	mem->am_offset = 0;
 	mem->am_is_bound = 0;
 	TAILQ_INSERT_TAIL(&sc->agp.as_memory, mem, am_link);
 	sc->agp.as_allocated += size;
 
-	return mem;
+	return (mem);
 }
 
 static int
 agp_i810_free_memory(device_t dev, struct agp_memory *mem)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
+	struct agp_i810_softc *sc;
+	vm_page_t m;
 
 	if (mem->am_is_bound)
-		return EBUSY;
+		return (EBUSY);
+
+	sc = device_get_softc(dev);
 
 	if (mem->am_type == 2) {
 		if (mem->am_size == AGP_PAGE_SIZE) {
 			/*
 			 * Unwire the page which we wired in alloc_memory.
 			 */
-			vm_page_t m;
-	
 			VM_OBJECT_LOCK(mem->am_obj);
 			m = vm_page_lookup(mem->am_obj, 0);
 			vm_page_lock(m);
@@ -1025,14 +2018,13 @@
 	if (mem->am_obj)
 		vm_object_deallocate(mem->am_obj);
 	free(mem, M_AGP);
-	return 0;
+	return (0);
 }
 
 static int
-agp_i810_bind_memory(device_t dev, struct agp_memory *mem,
-		     vm_offset_t offset)
+agp_i810_bind_memory(device_t dev, struct agp_memory *mem, vm_offset_t offset)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
+	struct agp_i810_softc *sc;
 	vm_offset_t i;
 
 	/* Do some sanity checks first. */
@@ -1040,76 +2032,78 @@
 	    offset + mem->am_size > AGP_GET_APERTURE(dev)) {
 		device_printf(dev, "binding memory at bad offset %#x\n",
 		    (int)offset);
-		return EINVAL;
+		return (EINVAL);
 	}
 
+	sc = device_get_softc(dev);
 	if (mem->am_type == 2 && mem->am_size != AGP_PAGE_SIZE) {
 		mtx_lock(&sc->agp.as_lock);
 		if (mem->am_is_bound) {
 			mtx_unlock(&sc->agp.as_lock);
-			return EINVAL;
+			return (EINVAL);
 		}
 		/* The memory's already wired down, just stick it in the GTT. */
 		for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) {
-			agp_i810_write_gtt_entry(dev, offset + i,
-			    mem->am_physical + i, 1);
+			sc->match->driver->install_gtt_pte(dev, (offset + i) >>
+			    AGP_PAGE_SHIFT, mem->am_physical + i, 0);
 		}
 		agp_flush_cache();
 		mem->am_offset = offset;
 		mem->am_is_bound = 1;
 		mtx_unlock(&sc->agp.as_lock);
-		return 0;
+		return (0);
 	}
 
 	if (mem->am_type != 1)
-		return agp_generic_bind_memory(dev, mem, offset);
+		return (agp_generic_bind_memory(dev, mem, offset));
 
-	if ( sc->chiptype != CHIP_I810 )
-		return EINVAL;
-
-	for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) {
+	/*
+	 * Mapping local DRAM into GATT.
+	 */
+	if (sc->match->driver->chiptype != CHIP_I810)
+		return (EINVAL);
+	for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE)
 		bus_write_4(sc->sc_res[0],
 		    AGP_I810_GTT + (i >> AGP_PAGE_SHIFT) * 4, i | 3);
-	}
 
-	return 0;
+	return (0);
 }
 
 static int
 agp_i810_unbind_memory(device_t dev, struct agp_memory *mem)
 {
-	struct agp_i810_softc *sc = device_get_softc(dev);
+	struct agp_i810_softc *sc;
 	vm_offset_t i;
 
+	sc = device_get_softc(dev);
+
 	if (mem->am_type == 2 && mem->am_size != AGP_PAGE_SIZE) {
 		mtx_lock(&sc->agp.as_lock);
 		if (!mem->am_is_bound) {
 			mtx_unlock(&sc->agp.as_lock);
-			return EINVAL;
+			return (EINVAL);
 		}
 
 		for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) {
-			agp_i810_write_gtt_entry(dev, mem->am_offset + i,
-			    0, 0);
+			sc->match->driver->install_gtt_pte(dev,
+			    (mem->am_offset + i) >> AGP_PAGE_SHIFT, 0, 0);
 		}
 		agp_flush_cache();
 		mem->am_is_bound = 0;
 		mtx_unlock(&sc->agp.as_lock);
-		return 0;
+		return (0);
 	}
 
 	if (mem->am_type != 1)
-		return agp_generic_unbind_memory(dev, mem);
+		return (agp_generic_unbind_memory(dev, mem));
 
-	if ( sc->chiptype != CHIP_I810 )
-		return EINVAL;
-
+	if (sc->match->driver->chiptype != CHIP_I810)
+		return (EINVAL);
 	for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) {
-		bus_write_4(sc->sc_res[0],
-		    AGP_I810_GTT + (i >> AGP_PAGE_SHIFT) * 4, 0);
+		sc->match->driver->install_gtt_pte(dev, i >> AGP_PAGE_SHIFT,
+		    0, 0);
 	}
-
-	return 0;
+	return (0);
 }
 
 static device_method_t agp_i810_methods[] = {
@@ -1123,7 +2117,7 @@
 
 	/* AGP interface */
 	DEVMETHOD(agp_get_aperture,	agp_generic_get_aperture),
-	DEVMETHOD(agp_set_aperture,	agp_i810_set_aperture),
+	DEVMETHOD(agp_set_aperture,	agp_i810_method_set_aperture),
 	DEVMETHOD(agp_bind_page,	agp_i810_bind_page),
 	DEVMETHOD(agp_unbind_page,	agp_i810_unbind_page),
 	DEVMETHOD(agp_flush_tlb,	agp_i810_flush_tlb),
@@ -1132,6 +2126,7 @@
 	DEVMETHOD(agp_free_memory,	agp_i810_free_memory),
 	DEVMETHOD(agp_bind_memory,	agp_i810_bind_memory),
 	DEVMETHOD(agp_unbind_memory,	agp_i810_unbind_memory),
+	DEVMETHOD(agp_chipset_flush,	agp_intel_gtt_chipset_flush),
 
 	{ 0, 0 }
 };
@@ -1147,3 +2142,424 @@
 DRIVER_MODULE(agp_i810, vgapci, agp_i810_driver, agp_devclass, 0, 0);
 MODULE_DEPEND(agp_i810, agp, 1, 1, 1);
 MODULE_DEPEND(agp_i810, pci, 1, 1, 1);
+
+extern vm_page_t bogus_page;
+
+void
+agp_intel_gtt_clear_range(device_t dev, u_int first_entry, u_int num_entries)
+{
+	struct agp_i810_softc *sc;
+	u_int i;
+
+	sc = device_get_softc(dev);
+	for (i = 0; i < num_entries; i++)
+		sc->match->driver->install_gtt_pte(dev, first_entry + i,
+		    VM_PAGE_TO_PHYS(bogus_page), 0);
+	sc->match->driver->read_gtt_pte(dev, first_entry + num_entries - 1);
+}
+
+void
+agp_intel_gtt_insert_pages(device_t dev, u_int first_entry, u_int num_entries,
+    vm_page_t *pages, u_int flags)
+{
+	struct agp_i810_softc *sc;
+	u_int i;
+
+	sc = device_get_softc(dev);
+	for (i = 0; i < num_entries; i++) {
+		MPASS(pages[i]->valid == VM_PAGE_BITS_ALL);
+		MPASS(pages[i]->wire_count > 0);
+		sc->match->driver->install_gtt_pte(dev, first_entry + i,
+		    VM_PAGE_TO_PHYS(pages[i]), flags);
+	}
+	sc->match->driver->read_gtt_pte(dev, first_entry + num_entries - 1);
+}
+
+struct intel_gtt
+agp_intel_gtt_get(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	struct intel_gtt res;
+
+	sc = device_get_softc(dev);
+	res.stolen_size = sc->stolen_size;
+	res.gtt_total_entries = sc->gtt_total_entries;
+	res.gtt_mappable_entries = sc->gtt_mappable_entries;
+	res.do_idle_maps = 0;
+	res.scratch_page_dma = VM_PAGE_TO_PHYS(bogus_page);
+	return (res);
+}
+
+static int
+agp_i810_chipset_flush_setup(device_t dev)
+{
+
+	return (0);
+}
+
+static void
+agp_i810_chipset_flush_teardown(device_t dev)
+{
+
+	/* Nothing to do. */
+}
+
+static void
+agp_i810_chipset_flush(device_t dev)
+{
+
+	/* Nothing to do. */
+}
+
+static void
+agp_i830_chipset_flush(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t hic;
+	int i;
+
+	sc = device_get_softc(dev);
+	pmap_invalidate_cache();
+	hic = bus_read_4(sc->sc_res[0], AGP_I830_HIC);
+	bus_write_4(sc->sc_res[0], AGP_I830_HIC, hic | (1 << 31));
+	for (i = 0; i < 20000 /* 1 sec */; i++) {
+		hic = bus_read_4(sc->sc_res[0], AGP_I830_HIC);
+		if ((hic & (1 << 31)) != 0)
+			break;
+		DELAY(50);
+	}
+}
+
+static int
+agp_i915_chipset_flush_alloc_page(device_t dev, uint64_t start, uint64_t end)
+{
+	struct agp_i810_softc *sc;
+	device_t vga;
+
+	sc = device_get_softc(dev);
+	vga = device_get_parent(dev);
+	sc->sc_flush_page_rid = 100;
+	sc->sc_flush_page_res = BUS_ALLOC_RESOURCE(device_get_parent(vga), dev,
+	    SYS_RES_MEMORY, &sc->sc_flush_page_rid, start, end, PAGE_SIZE,
+	    RF_ACTIVE);
+	if (sc->sc_flush_page_res == NULL) {
+		device_printf(dev, "Failed to allocate flush page at 0x%jx\n",
+		    (uintmax_t)start);
+		return (EINVAL);
+	}
+	sc->sc_flush_page_vaddr = rman_get_virtual(sc->sc_flush_page_res);
+	if (bootverbose) {
+		device_printf(dev, "Allocated flush page phys 0x%jx virt %p\n",
+		    (uintmax_t)rman_get_start(sc->sc_flush_page_res),
+		    sc->sc_flush_page_vaddr);
+	}
+	return (0);
+}
+
+static void
+agp_i915_chipset_flush_free_page(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	device_t vga;
+
+	sc = device_get_softc(dev);
+	vga = device_get_parent(dev);
+	if (sc->sc_flush_page_res == NULL)
+		return;
+	BUS_DEACTIVATE_RESOURCE(device_get_parent(vga), dev, SYS_RES_MEMORY,
+	    sc->sc_flush_page_rid, sc->sc_flush_page_res);
+	BUS_RELEASE_RESOURCE(device_get_parent(vga), dev, SYS_RES_MEMORY,
+	    sc->sc_flush_page_rid, sc->sc_flush_page_res);
+}
+
+static int
+agp_i915_chipset_flush_setup(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t temp;
+	int error;
+
+	sc = device_get_softc(dev);
+	temp = pci_read_config(sc->bdev, AGP_I915_IFPADDR, 4);
+	if ((temp & 1) != 0) {
+		temp &= ~1;
+		if (bootverbose)
+			device_printf(dev,
+			    "Found already configured flush page at 0x%jx\n",
+			    (uintmax_t)temp);
+		sc->sc_bios_allocated_flush_page = 1;
+		/*
+		 * In the case BIOS initialized the flush pointer (?)
+		 * register, expect that BIOS also set up the resource
+		 * for the page.
+		 */
+		error = agp_i915_chipset_flush_alloc_page(dev, temp,
+		    temp + PAGE_SIZE - 1);
+		if (error != 0)
+			return (error);
+	} else {
+		sc->sc_bios_allocated_flush_page = 0;
+		error = agp_i915_chipset_flush_alloc_page(dev, 0, 0xffffffff);
+		if (error != 0)
+			return (error);
+		temp = rman_get_start(sc->sc_flush_page_res);
+		pci_write_config(sc->bdev, AGP_I915_IFPADDR, temp | 1, 4);
+	}
+	return (0);
+}
+
+static void
+agp_i915_chipset_flush_teardown(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t temp;
+
+	sc = device_get_softc(dev);
+	if (sc->sc_flush_page_res == NULL)
+		return;
+	if (!sc->sc_bios_allocated_flush_page) {
+		temp = pci_read_config(sc->bdev, AGP_I915_IFPADDR, 4);
+		temp &= ~1;
+		pci_write_config(sc->bdev, AGP_I915_IFPADDR, temp, 4);
+	}		
+	agp_i915_chipset_flush_free_page(dev);
+}
+
+static int
+agp_i965_chipset_flush_setup(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint64_t temp;
+	uint32_t temp_hi, temp_lo;
+	int error;
+
+	sc = device_get_softc(dev);
+
+	temp_hi = pci_read_config(sc->bdev, AGP_I965_IFPADDR + 4, 4);
+	temp_lo = pci_read_config(sc->bdev, AGP_I965_IFPADDR, 4);
+
+	if ((temp_lo & 1) != 0) {
+		temp = ((uint64_t)temp_hi << 32) | (temp_lo & ~1);
+		if (bootverbose)
+			device_printf(dev,
+			    "Found already configured flush page at 0x%jx\n",
+			    (uintmax_t)temp);
+		sc->sc_bios_allocated_flush_page = 1;
+		/*
+		 * In the case BIOS initialized the flush pointer (?)
+		 * register, expect that BIOS also set up the resource
+		 * for the page.
+		 */
+		error = agp_i915_chipset_flush_alloc_page(dev, temp,
+		    temp + PAGE_SIZE - 1);
+		if (error != 0)
+			return (error);
+	} else {
+		sc->sc_bios_allocated_flush_page = 0;
+		error = agp_i915_chipset_flush_alloc_page(dev, 0, ~0);
+		if (error != 0)
+			return (error);
+		temp = rman_get_start(sc->sc_flush_page_res);
+		pci_write_config(sc->bdev, AGP_I965_IFPADDR + 4,
+		    (temp >> 32) & UINT32_MAX, 4);
+		pci_write_config(sc->bdev, AGP_I965_IFPADDR,
+		    (temp & UINT32_MAX) | 1, 4);
+	}
+	return (0);
+}
+
+static void
+agp_i965_chipset_flush_teardown(device_t dev)
+{
+	struct agp_i810_softc *sc;
+	uint32_t temp_lo;
+
+	sc = device_get_softc(dev);
+	if (sc->sc_flush_page_res == NULL)
+		return;
+	if (!sc->sc_bios_allocated_flush_page) {
+		temp_lo = pci_read_config(sc->bdev, AGP_I965_IFPADDR, 4);
+		temp_lo &= ~1;
+		pci_write_config(sc->bdev, AGP_I965_IFPADDR, temp_lo, 4);
+	}
+	agp_i915_chipset_flush_free_page(dev);
+}
+
+static void
+agp_i915_chipset_flush(device_t dev)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	*(uint32_t *)sc->sc_flush_page_vaddr = 1;
+}
+
+int
+agp_intel_gtt_chipset_flush(device_t dev)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(dev);
+	sc->match->driver->chipset_flush(dev);
+	return (0);
+}
+
+void
+agp_intel_gtt_unmap_memory(device_t dev, struct sglist *sg_list)
+{
+}
+
+int
+agp_intel_gtt_map_memory(device_t dev, vm_page_t *pages, u_int num_entries,
+    struct sglist **sg_list)
+{
+	struct agp_i810_softc *sc;
+	struct sglist *sg;
+	int i;
+#if 0
+	int error;
+	bus_dma_tag_t dmat;
+#endif
+
+	if (*sg_list != NULL)
+		return (0);
+	sc = device_get_softc(dev);
+	sg = sglist_alloc(num_entries, M_WAITOK /* XXXKIB */);
+	for (i = 0; i < num_entries; i++) {
+		sg->sg_segs[i].ss_paddr = VM_PAGE_TO_PHYS(pages[i]);
+		sg->sg_segs[i].ss_len = PAGE_SIZE;
+	}
+
+#if 0
+	error = bus_dma_tag_create(bus_get_dma_tag(dev),
+	    1 /* alignment */, 0 /* boundary */,
+	    1ULL << sc->match->busdma_addr_mask_sz /* lowaddr */,
+	    BUS_SPACE_MAXADDR /* highaddr */,
+            NULL /* filtfunc */, NULL /* filtfuncarg */,
+	    BUS_SPACE_MAXADDR /* maxsize */,
+	    BUS_SPACE_UNRESTRICTED /* nsegments */,
+	    BUS_SPACE_MAXADDR /* maxsegsz */,
+	    0 /* flags */, NULL /* lockfunc */, NULL /* lockfuncarg */,
+	    &dmat);
+	if (error != 0) {
+		sglist_free(sg);
+		return (error);
+	}
+	/* XXXKIB */
+#endif
+	*sg_list = sg;
+	return (0);
+}
+
+void
+agp_intel_gtt_insert_sg_entries(device_t dev, struct sglist *sg_list,
+    u_int first_entry, u_int flags)
+{
+	struct agp_i810_softc *sc;
+	vm_paddr_t spaddr;
+	size_t slen;
+	u_int i, j;
+
+	sc = device_get_softc(dev);
+	for (i = j = 0; j < sg_list->sg_nseg; j++) {
+		spaddr = sg_list->sg_segs[i].ss_paddr;
+		slen = sg_list->sg_segs[i].ss_len;
+		for (; slen > 0; i++) {
+			sc->match->driver->install_gtt_pte(dev, first_entry + i,
+			    spaddr, flags);
+			spaddr += AGP_PAGE_SIZE;
+			slen -= AGP_PAGE_SIZE;
+		}
+	}
+	sc->match->driver->read_gtt_pte(dev, first_entry + i - 1);
+}
+
+void
+intel_gtt_clear_range(u_int first_entry, u_int num_entries)
+{
+
+	agp_intel_gtt_clear_range(intel_agp, first_entry, num_entries);
+}
+
+void
+intel_gtt_insert_pages(u_int first_entry, u_int num_entries, vm_page_t *pages,
+    u_int flags)
+{
+
+	agp_intel_gtt_insert_pages(intel_agp, first_entry, num_entries,
+	    pages, flags);
+}
+
+struct intel_gtt
+intel_gtt_get(void)
+{
+
+	return (agp_intel_gtt_get(intel_agp));
+}
+
+int
+intel_gtt_chipset_flush(void)
+{
+
+	return (agp_intel_gtt_chipset_flush(intel_agp));
+}
+
+void
+intel_gtt_unmap_memory(struct sglist *sg_list)
+{
+
+	agp_intel_gtt_unmap_memory(intel_agp, sg_list);
+}
+
+int
+intel_gtt_map_memory(vm_page_t *pages, u_int num_entries,
+    struct sglist **sg_list)
+{
+
+	return (agp_intel_gtt_map_memory(intel_agp, pages, num_entries,
+	    sg_list));
+}
+
+void
+intel_gtt_insert_sg_entries(struct sglist *sg_list, u_int first_entry,
+    u_int flags)
+{
+
+	agp_intel_gtt_insert_sg_entries(intel_agp, sg_list, first_entry, flags);
+}
+
+device_t
+intel_gtt_get_bridge_device(void)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(intel_agp);
+	return (sc->bdev);
+}
+
+vm_paddr_t
+intel_gtt_read_pte_paddr(u_int entry)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(intel_agp);
+	return (sc->match->driver->read_gtt_pte_paddr(intel_agp, entry));
+}
+
+u_int32_t
+intel_gtt_read_pte(u_int entry)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(intel_agp);
+	return (sc->match->driver->read_gtt_pte(intel_agp, entry));
+}
+
+void
+intel_gtt_write(u_int entry, uint32_t val)
+{
+	struct agp_i810_softc *sc;
+
+	sc = device_get_softc(intel_agp);
+	return (sc->match->driver->write_gtt(intel_agp, entry, val));
+}
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/agp/agp_i810.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/dev/agp/agp_i810.h	Wed Jul 25 17:04:43 2012 +0300
@@ -0,0 +1,101 @@
+/*-
+ * Copyright (c) 2011 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/agp/agp_i810.h 235782 2012-05-22 10:59:26Z kib $
+ */
+
+#ifndef AGP_AGP_I810_H
+#define	AGP_AGP_I810_H
+
+#include <sys/param.h>
+#include <sys/sglist.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+/* Special gtt memory types */
+#define AGP_DCACHE_MEMORY	1
+#define AGP_PHYS_MEMORY		2
+
+/* New caching attributes for gen6/sandybridge */
+#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2)
+#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4)
+
+/* flag for GFDT type */
+#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
+
+struct intel_gtt {
+	/* Size of memory reserved for graphics by the BIOS */
+	u_int stolen_size;
+	/* Total number of gtt entries. */
+	u_int gtt_total_entries;
+	/*
+	 * Part of the gtt that is mappable by the cpu, for those
+	 * chips where this is not the full gtt.
+	 */
+	u_int gtt_mappable_entries;
+
+	/*
+	 * Always false.
+	 */
+	u_int do_idle_maps;
+	
+	/*
+	 * Share the scratch page dma with ppgtts.
+	 */
+	vm_paddr_t scratch_page_dma;
+};
+
+struct intel_gtt agp_intel_gtt_get(device_t dev);
+int agp_intel_gtt_chipset_flush(device_t dev);
+void agp_intel_gtt_unmap_memory(device_t dev, struct sglist *sg_list);
+void agp_intel_gtt_clear_range(device_t dev, u_int first_entry,
+    u_int num_entries);
+int agp_intel_gtt_map_memory(device_t dev, vm_page_t *pages, u_int num_entries,
+    struct sglist **sg_list);
+void agp_intel_gtt_insert_sg_entries(device_t dev, struct sglist *sg_list,
+    u_int pg_start, u_int flags);
+void agp_intel_gtt_insert_pages(device_t dev, u_int first_entry,
+    u_int num_entries, vm_page_t *pages, u_int flags);
+
+struct intel_gtt intel_gtt_get(void);
+int intel_gtt_chipset_flush(void);
+void intel_gtt_unmap_memory(struct sglist *sg_list);
+void intel_gtt_clear_range(u_int first_entry, u_int num_entries);
+int intel_gtt_map_memory(vm_page_t *pages, u_int num_entries,
+    struct sglist **sg_list);
+void intel_gtt_insert_sg_entries(struct sglist *sg_list, u_int pg_start,
+    u_int flags);
+void intel_gtt_insert_pages(u_int first_entry, u_int num_entries,
+    vm_page_t *pages, u_int flags);
+vm_paddr_t intel_gtt_read_pte_paddr(u_int entry);
+u_int32_t intel_gtt_read_pte(u_int entry);
+device_t intel_gtt_get_bridge_device(void);
+void intel_gtt_write(u_int entry, uint32_t val);
+
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/agp/agp_if.m
--- a/head/sys/dev/agp/agp_if.m	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/agp/agp_if.m	Wed Jul 25 17:04:43 2012 +0300
@@ -23,7 +23,7 @@
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
-# $FreeBSD$
+# $FreeBSD: head/sys/dev/agp/agp_if.m 235782 2012-05-22 10:59:26Z kib $
 #
 
 #include <sys/bus.h>
@@ -36,6 +36,14 @@
 #
 INTERFACE agp;
 
+CODE {
+	static int
+	null_agp_chipset_flush(device_t dev)
+	{
+		return (ENXIO);
+	}
+};
+
 #
 # Return the current aperture size.
 #
@@ -132,3 +140,7 @@
 	device_t	dev;
 	struct agp_memory *handle;
 };
+
+METHOD int chipset_flush {
+	device_t	dev;
+} DEFAULT null_agp_chipset_flush;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/agp/agppriv.h
--- a/head/sys/dev/agp/agppriv.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/agp/agppriv.h	Wed Jul 25 17:04:43 2012 +0300
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$FreeBSD$
+ *	$FreeBSD: head/sys/dev/agp/agppriv.h 235782 2012-05-22 10:59:26Z kib $
  */
 
 #ifndef _PCI_AGPPRIV_H_
@@ -73,7 +73,7 @@
 	struct agp_memory_list	as_memory;	/* list of allocated memory */
 	int			as_nextid;	/* next memory block id */
 	int			as_isopen;	/* user device is open */
-	struct cdev *as_devnode;	/* from make_dev */
+	struct cdev		*as_devnode;	/* from make_dev */
 	struct mtx		as_lock;	/* lock for access to GATT */
 };
 
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/agp/agpreg.h
--- a/head/sys/dev/agp/agpreg.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/agp/agpreg.h	Wed Jul 25 17:04:43 2012 +0300
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$FreeBSD$
+ *	$FreeBSD: head/sys/dev/agp/agpreg.h 235782 2012-05-22 10:59:26Z kib $
  */
 
 #ifndef _PCI_AGPREG_H_
@@ -176,10 +176,33 @@
 #define AGP_I810_GMADR		0x10
 #define AGP_I810_MMADR		0x14
 
+#define	I810_PTE_VALID		0x00000001
+
+/*
+ * Cache control
+ *
+ * Pre-Sandybridge bits
+ */
+#define	I810_PTE_MAIN_UNCACHED	0x00000000
+#define	I810_PTE_LOCAL		0x00000002	/* Non-snooped main phys memory */
+#define	I830_PTE_SYSTEM_CACHED  0x00000006	/* Snooped main phys memory */
+
+/*
+ * Sandybridge
+ * LLC - Last Level Cache
+ * MMC - Mid Level Cache
+ */
+#define	GEN6_PTE_RESERVED	0x00000000
+#define	GEN6_PTE_UNCACHED	0x00000002	/* Do not cache */
+#define	GEN6_PTE_LLC		0x00000004	/* Cache in LLC */
+#define	GEN6_PTE_LLC_MLC	0x00000006	/* Cache in LLC and MLC */
+#define	GEN6_PTE_GFDT		0x00000008	/* Graphics Data Type */
+
 /*
  * Memory mapped register offsets for i810 chipset.
  */
 #define AGP_I810_PGTBL_CTL	0x2020
+#define	AGP_I810_PGTBL_ENABLED	0x00000001
 /**
  * This field determines the actual size of the global GTT on the 965
  * and G33
@@ -187,7 +210,23 @@
 #define AGP_I810_PGTBL_SIZE_MASK	0x0000000e
 #define AGP_I810_PGTBL_SIZE_512KB	(0 << 1)
 #define AGP_I810_PGTBL_SIZE_256KB	(1 << 1)
-#define AGP_I810_PGTBL_SIZE_128KB	(2 << 1)
+#define	AGP_I810_PGTBL_SIZE_128KB	(2 << 1)
+#define	AGP_I810_PGTBL_SIZE_1MB		(3 << 1)
+#define	AGP_I810_PGTBL_SIZE_2MB		(4 << 1)
+#define	AGP_I810_PGTBL_SIZE_1_5MB	(5 << 1)
+#define AGP_G33_GCC1_SIZE_MASK		(3 << 8)
+#define AGP_G33_GCC1_SIZE_1M		(1 << 8)
+#define AGP_G33_GCC1_SIZE_2M		(2 << 8)
+#define AGP_G4x_GCC1_SIZE_MASK		(0xf << 8)
+#define AGP_G4x_GCC1_SIZE_1M		(0x1 << 8)
+#define AGP_G4x_GCC1_SIZE_2M		(0x3 << 8)
+#define AGP_G4x_GCC1_SIZE_VT_EN		(0x8 << 8)
+#define AGP_G4x_GCC1_SIZE_VT_1M \
+    (AGP_G4x_GCC1_SIZE_1M | AGP_G4x_GCC1_SIZE_VT_EN)
+#define AGP_G4x_GCC1_SIZE_VT_1_5M	((0x2 << 8) | AGP_G4x_GCC1_SIZE_VT_EN)
+#define AGP_G4x_GCC1_SIZE_VT_2M	\
+    (AGP_G4x_GCC1_SIZE_2M | AGP_G4x_GCC1_SIZE_VT_EN)
+
 #define AGP_I810_DRT		0x3000
 #define AGP_I810_DRT_UNPOPULATED 0x00
 #define AGP_I810_DRT_POPULATED	0x01
@@ -207,6 +246,7 @@
 #define AGP_I830_GCC1_GMASIZE		0x01
 #define AGP_I830_GCC1_GMASIZE_64	0x01
 #define AGP_I830_GCC1_GMASIZE_128	0x00
+#define	AGP_I830_HIC			0x70
 
 /*
  * Config registers for 852GM/855GM/865G device 0
@@ -243,6 +283,9 @@
 #define AGP_I915_GCC1_GMS_STOLEN_48M	0x60
 #define AGP_I915_GCC1_GMS_STOLEN_64M	0x70
 #define AGP_I915_DEVEN			0x54
+#define	AGP_SB_DEVEN_D2EN		0x10	/* SB+ has IGD enabled bit */
+#define	AGP_SB_DEVEN_D2EN_ENABLED	0x10	/* in different place */
+#define	AGP_SB_DEVEN_D2EN_DISABLED	0x00
 #define AGP_I915_DEVEN_D2F0		0x08
 #define AGP_I915_DEVEN_D2F0_ENABLED	0x08
 #define AGP_I915_DEVEN_D2F0_DISABLED	0x00
@@ -250,6 +293,7 @@
 #define AGP_I915_MSAC_GMASIZE		0x02
 #define AGP_I915_MSAC_GMASIZE_128	0x02
 #define AGP_I915_MSAC_GMASIZE_256	0x00
+#define	AGP_I915_IFPADDR		0x60
 
 /*
  * G965 registers
@@ -262,6 +306,8 @@
 #define AGP_I965_PGTBL_SIZE_1MB		(3 << 1)
 #define AGP_I965_PGTBL_SIZE_2MB		(4 << 1)
 #define AGP_I965_PGTBL_SIZE_1_5MB	(5 << 1)
+#define AGP_I965_PGTBL_CTL2		0x20c4
+#define	AGP_I965_IFPADDR		0x70
 
 /*
  * G33 registers
@@ -275,12 +321,43 @@
 /*
  * G4X registers
  */
+#define AGP_G4X_GMADR			0x20
+#define AGP_G4X_MMADR			0x10
+#define AGP_G4X_GTTADR			0x18
 #define AGP_G4X_GCC1_GMS_STOLEN_96M	0xa0
 #define AGP_G4X_GCC1_GMS_STOLEN_160M	0xb0
 #define AGP_G4X_GCC1_GMS_STOLEN_224M	0xc0
 #define AGP_G4X_GCC1_GMS_STOLEN_352M	0xd0
 
 /*
+ * SandyBridge/IvyBridge registers
+ */
+#define AGP_SNB_GCC1			0x50
+#define AGP_SNB_GMCH_GMS_STOLEN_MASK	0xF8
+#define AGP_SNB_GMCH_GMS_STOLEN_32M	(1 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_64M	(2 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_96M	(3 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_128M	(4 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_160M	(5 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_192M	(6 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_224M	(7 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_256M	(8 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_288M	(9 << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_320M	(0xa << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_352M	(0xb << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_384M	(0xc << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_416M	(0xd << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_448M	(0xe << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_480M	(0xf << 3)
+#define AGP_SNB_GMCH_GMS_STOLEN_512M	(0x10 << 3)
+#define AGP_SNB_GTT_SIZE_0M		(0 << 8)
+#define AGP_SNB_GTT_SIZE_1M		(1 << 8)
+#define AGP_SNB_GTT_SIZE_2M		(2 << 8)
+#define AGP_SNB_GTT_SIZE_MASK		(3 << 8)
+
+#define AGP_SNB_GFX_MODE		0x02520
+
+/*
  * NVIDIA nForce/nForce2 registers
  */
 #define	AGP_NVIDIA_0_APBASE		0x10
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/agp/agpvar.h
--- a/head/sys/dev/agp/agpvar.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/agp/agpvar.h	Wed Jul 25 17:04:43 2012 +0300
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$FreeBSD$
+ *	$FreeBSD: head/sys/dev/agp/agpvar.h 235782 2012-05-22 10:59:26Z kib $
  */
 
 #ifndef _PCI_AGPVAR_H_
@@ -122,4 +122,10 @@
  */
 void agp_memory_info(device_t dev, void *handle, struct agp_memory_info *mi);
 
+#define AGP_NORMAL_MEMORY 0
+
+#define AGP_USER_TYPES (1 << 16)
+#define AGP_USER_MEMORY (AGP_USER_TYPES)
+#define AGP_USER_CACHED_MEMORY (AGP_USER_TYPES + 1)
+
 #endif /* !_PCI_AGPVAR_H_ */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/ahci/ahci.c
--- a/head/sys/dev/ahci/ahci.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/ahci/ahci.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/ahci/ahci.c 232380 2012-03-02 08:49:07Z mav $");
+__FBSDID("$FreeBSD: head/sys/dev/ahci/ahci.c 236847 2012-06-10 11:17:14Z mav $");
 
 #include <sys/param.h>
 #include <sys/module.h>
@@ -200,8 +200,12 @@
 	{0x91231b4b, 0x00, "Marvell 88SE912x",	AHCI_Q_EDGEIS|AHCI_Q_SATA2|AHCI_Q_NOBSYRES},
 	{0x91251b4b, 0x00, "Marvell 88SE9125",	AHCI_Q_NOBSYRES},
 	{0x91281b4b, 0x00, "Marvell 88SE9128",	AHCI_Q_NOBSYRES|AHCI_Q_ALTSIG},
+	{0x91301b4b, 0x00, "Marvell 88SE9130",  AHCI_Q_NOBSYRES|AHCI_Q_ALTSIG},
 	{0x91721b4b, 0x00, "Marvell 88SE9172",	AHCI_Q_NOBSYRES},
 	{0x91821b4b, 0x00, "Marvell 88SE9182",	AHCI_Q_NOBSYRES},
+	{0x92201b4b, 0x00, "Marvell 88SE9220",  AHCI_Q_NOBSYRES|AHCI_Q_ALTSIG},
+	{0x92301b4b, 0x00, "Marvell 88SE9230",  AHCI_Q_NOBSYRES|AHCI_Q_ALTSIG},
+	{0x92351b4b, 0x00, "Marvell 88SE9235",  AHCI_Q_NOBSYRES},
 	{0x06201103, 0x00, "HighPoint RocketRAID 620",	AHCI_Q_NOBSYRES},
 	{0x06201b4b, 0x00, "HighPoint RocketRAID 620",	AHCI_Q_NOBSYRES},
 	{0x06221103, 0x00, "HighPoint RocketRAID 622",	AHCI_Q_NOBSYRES},
@@ -1465,7 +1469,9 @@
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	mtx_lock(&ch->mtx);
+	xpt_batch_start(ch->sim);
 	ahci_ch_intr(data);
+	xpt_batch_done(ch->sim);
 	mtx_unlock(&ch->mtx);
 }
 
@@ -2890,7 +2896,7 @@
 			d = &ch->curr[ccb->ccb_h.target_id];
 		else
 			d = &ch->user[ccb->ccb_h.target_id];
-		cts->protocol = PROTO_ATA;
+		cts->protocol = PROTO_UNSPECIFIED;
 		cts->protocol_version = PROTO_VERSION_UNSPECIFIED;
 		cts->transport = XPORT_SATA;
 		cts->transport_version = XPORT_VERSION_UNSPECIFIED;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/aic7xxx/aic79xx.c
--- a/head/sys/dev/aic7xxx/aic79xx.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/aic7xxx/aic79xx.c	Wed Jul 25 17:04:43 2012 +0300
@@ -46,7 +46,7 @@
 #include "aicasm/aicasm_insformat.h"
 #else
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/aic7xxx/aic79xx.c 237601 2012-06-26 14:51:35Z ken $");
 #include <dev/aic7xxx/aic79xx_osm.h>
 #include <dev/aic7xxx/aic79xx_inline.h>
 #include <dev/aic7xxx/aicasm/aicasm_insformat.h>
@@ -8561,7 +8561,7 @@
 ahd_send_lstate_events(struct ahd_softc *ahd, struct ahd_tmode_lstate *lstate)
 {
 	struct ccb_hdr *ccbh;
-	struct ccb_immed_notify *inot;
+	struct ccb_immediate_notify *inot;
 
 	while (lstate->event_r_idx != lstate->event_w_idx
 	    && (ccbh = SLIST_FIRST(&lstate->immed_notifies)) != NULL) {
@@ -8569,19 +8569,18 @@
 
 		event = &lstate->event_buffer[lstate->event_r_idx];
 		SLIST_REMOVE_HEAD(&lstate->immed_notifies, sim_links.sle);
-		inot = (struct ccb_immed_notify *)ccbh;
+		inot = (struct ccb_immediate_notify *)ccbh;
 		switch (event->event_type) {
 		case EVENT_TYPE_BUS_RESET:
 			ccbh->status = CAM_SCSI_BUS_RESET|CAM_DEV_QFRZN;
 			break;
 		default:
 			ccbh->status = CAM_MESSAGE_RECV|CAM_DEV_QFRZN;
-			inot->message_args[0] = event->event_type;
-			inot->message_args[1] = event->event_arg;
+			inot->arg = event->event_type;
+			inot->seq_id = event->event_arg;
 			break;
 		}
 		inot->initiator_id = event->initiator_id;
-		inot->sense_len = 0;
 		xpt_done((union ccb *)inot);
 		lstate->event_r_idx++;
 		if (lstate->event_r_idx == AHD_TMODE_EVENT_BUFFER_SIZE)
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/aic7xxx/aic79xx_osm.c
--- a/head/sys/dev/aic7xxx/aic79xx_osm.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/aic7xxx/aic79xx_osm.c	Wed Jul 25 17:04:43 2012 +0300
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/aic7xxx/aic79xx_osm.c 237601 2012-06-26 14:51:35Z ken $");
 
 #include <dev/aic7xxx/aic79xx_osm.h>
 #include <dev/aic7xxx/aic79xx_inline.h>
@@ -601,8 +601,8 @@
 		break;
 	}
 #ifdef AHD_TARGET_MODE
-	case XPT_NOTIFY_ACK:
-	case XPT_IMMED_NOTIFY:
+	case XPT_NOTIFY_ACKNOWLEDGE:
+	case XPT_IMMEDIATE_NOTIFY:
 	{
 		struct	   ahd_tmode_tstate *tstate;
 		struct	   ahd_tmode_lstate *lstate;
@@ -1189,7 +1189,7 @@
 	switch (abort_ccb->ccb_h.func_code) {
 #ifdef AHD_TARGET_MODE
 	case XPT_ACCEPT_TARGET_IO:
-	case XPT_IMMED_NOTIFY:
+	case XPT_IMMEDIATE_NOTIFY:
 	case XPT_CONT_TARGET_IO:
 	{
 		struct ahd_tmode_tstate *tstate;
@@ -1207,7 +1207,7 @@
 
 		if (abort_ccb->ccb_h.func_code == XPT_ACCEPT_TARGET_IO)
 			list = &lstate->accept_tios;
-		else if (abort_ccb->ccb_h.func_code == XPT_IMMED_NOTIFY)
+		else if (abort_ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY)
 			list = &lstate->immed_notifies;
 		else
 			list = NULL;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/aic7xxx/aic7xxx.c
--- a/head/sys/dev/aic7xxx/aic7xxx.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/aic7xxx/aic7xxx.c	Wed Jul 25 17:04:43 2012 +0300
@@ -46,7 +46,7 @@
 #include "aicasm/aicasm_insformat.h"
 #else
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/aic7xxx/aic7xxx.c 237601 2012-06-26 14:51:35Z ken $");
 #include <dev/aic7xxx/aic7xxx_osm.h>
 #include <dev/aic7xxx/aic7xxx_inline.h>
 #include <dev/aic7xxx/aicasm/aicasm_insformat.h>
@@ -6368,7 +6368,7 @@
 ahc_send_lstate_events(struct ahc_softc *ahc, struct ahc_tmode_lstate *lstate)
 {
 	struct ccb_hdr *ccbh;
-	struct ccb_immed_notify *inot;
+	struct ccb_immediate_notify *inot;
 
 	while (lstate->event_r_idx != lstate->event_w_idx
 	    && (ccbh = SLIST_FIRST(&lstate->immed_notifies)) != NULL) {
@@ -6376,19 +6376,18 @@
 
 		event = &lstate->event_buffer[lstate->event_r_idx];
 		SLIST_REMOVE_HEAD(&lstate->immed_notifies, sim_links.sle);
-		inot = (struct ccb_immed_notify *)ccbh;
+		inot = (struct ccb_immediate_notify *)ccbh;
 		switch (event->event_type) {
 		case EVENT_TYPE_BUS_RESET:
 			ccbh->status = CAM_SCSI_BUS_RESET|CAM_DEV_QFRZN;
 			break;
 		default:
 			ccbh->status = CAM_MESSAGE_RECV|CAM_DEV_QFRZN;
-			inot->message_args[0] = event->event_type;
-			inot->message_args[1] = event->event_arg;
+			inot->arg = event->event_type;
+			inot->seq_id = event->event_arg;
 			break;
 		}
 		inot->initiator_id = event->initiator_id;
-		inot->sense_len = 0;
 		xpt_done((union ccb *)inot);
 		lstate->event_r_idx++;
 		if (lstate->event_r_idx == AHC_TMODE_EVENT_BUFFER_SIZE)
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/aic7xxx/aic7xxx_osm.c
--- a/head/sys/dev/aic7xxx/aic7xxx_osm.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/aic7xxx/aic7xxx_osm.c	Wed Jul 25 17:04:43 2012 +0300
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/aic7xxx/aic7xxx_osm.c 237601 2012-06-26 14:51:35Z ken $");
 
 #include <dev/aic7xxx/aic7xxx_osm.h>
 #include <dev/aic7xxx/aic7xxx_inline.h>
@@ -568,8 +568,8 @@
 		}
 		break;
 	}
-	case XPT_NOTIFY_ACK:
-	case XPT_IMMED_NOTIFY:
+	case XPT_NOTIFY_ACKNOWLEDGE:
+	case XPT_IMMEDIATE_NOTIFY:
 	{
 		struct	   ahc_tmode_tstate *tstate;
 		struct	   ahc_tmode_lstate *lstate;
@@ -1248,7 +1248,7 @@
 	abort_ccb = ccb->cab.abort_ccb;
 	switch (abort_ccb->ccb_h.func_code) {
 	case XPT_ACCEPT_TARGET_IO:
-	case XPT_IMMED_NOTIFY:
+	case XPT_IMMEDIATE_NOTIFY:
 	case XPT_CONT_TARGET_IO:
 	{
 		struct ahc_tmode_tstate *tstate;
@@ -1266,7 +1266,7 @@
 
 		if (abort_ccb->ccb_h.func_code == XPT_ACCEPT_TARGET_IO)
 			list = &lstate->accept_tios;
-		else if (abort_ccb->ccb_h.func_code == XPT_IMMED_NOTIFY)
+		else if (abort_ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY)
 			list = &lstate->immed_notifies;
 		else
 			list = NULL;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/aic7xxx/aicasm/Makefile
--- a/head/sys/dev/aic7xxx/aicasm/Makefile	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/aic7xxx/aicasm/Makefile	Wed Jul 25 17:04:43 2012 +0300
@@ -1,7 +1,7 @@
 #
 # $Id: //depot/aic7xxx/freebsd/dev/aic7xxx/aicasm/Makefile#2 $
 #
-# $FreeBSD: head/sys/dev/aic7xxx/aicasm/Makefile 220863 2011-04-19 18:09:21Z dim $
+# $FreeBSD: head/sys/dev/aic7xxx/aicasm/Makefile 236578 2012-06-04 20:36:11Z dim $
 
 PROG=	aicasm
 
@@ -24,8 +24,7 @@
 DEPENDFILE=	.depend_aicasm
 .endif
 
-NOSTDINC=	-nostdinc
-CFLAGS+= ${NOSTDINC} -I/usr/include -I.
+CFLAGS+= -I.
 .ifdef MAKESRCPATH
 CFLAGS+= -I${MAKESRCPATH}
 .endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/aic7xxx/aicasm/aicasm.c
--- a/head/sys/dev/aic7xxx/aicasm/aicasm.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/aic7xxx/aicasm/aicasm.c	Wed Jul 25 17:04:43 2012 +0300
@@ -39,7 +39,7 @@
  *
  * $Id: //depot/aic7xxx/aic7xxx/aicasm/aicasm.c#23 $
  *
- * $FreeBSD: head/sys/dev/aic7xxx/aicasm/aicasm.c 224046 2011-07-15 00:36:47Z emaste $
+ * $FreeBSD: head/sys/dev/aic7xxx/aicasm/aicasm.c 236571 2012-06-04 17:22:43Z dim $
  */
 #include <sys/types.h>
 #include <sys/mman.h>
@@ -530,7 +530,7 @@
 	int instrptr;
 	unsigned int line;
 	int func_count;
-	int skip_addr;
+	unsigned int skip_addr;
 
 	instrcount = 0;
 	instrptr = 0;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/ata/ata-all.c
--- a/head/sys/dev/ata/ata-all.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/ata/ata-all.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/ata/ata-all.c 233282 2012-03-21 16:59:39Z marius $");
+__FBSDID("$FreeBSD: head/sys/dev/ata/ata-all.c 238666 2012-07-21 08:19:43Z mav $");
 
 #include "opt_ata.h"
 #include <sys/param.h>
@@ -544,9 +544,11 @@
     struct ata_channel *ch = (struct ata_channel *)data;
 
     mtx_lock(&ch->state_mtx);
+    xpt_batch_start(ch->sim);
 #endif
     ata_interrupt_locked(data);
 #ifdef ATA_CAM
+    xpt_batch_done(ch->sim);
     mtx_unlock(&ch->state_mtx);
 #endif
 }
@@ -885,7 +887,7 @@
 {
     device_t child;
 
-    if ((child = device_add_child(parent, NULL, unit))) {
+    if ((child = device_add_child(parent, (unit < 0) ? NULL : "ad", unit))) {
 	device_set_softc(child, atadev);
 	device_quiet(child);
 	atadev->dev = child;
@@ -1530,7 +1532,7 @@
 
 	ch->requestsense = 1;
 
-	bzero(request, sizeof(&request));
+	bzero(request, sizeof(*request));
 	request->dev = NULL;
 	request->parent = dev;
 	request->unit = ccb->ccb_h.target_id;
@@ -1785,7 +1787,7 @@
 			d = &ch->curr[ccb->ccb_h.target_id];
 		else
 			d = &ch->user[ccb->ccb_h.target_id];
-		cts->protocol = PROTO_ATA;
+		cts->protocol = PROTO_UNSPECIFIED;
 		cts->protocol_version = PROTO_VERSION_UNSPECIFIED;
 		if (ch->flags & ATA_SATA) {
 			cts->transport = XPORT_SATA;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/ata/ata-lowlevel.c
--- a/head/sys/dev/ata/ata-lowlevel.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/ata/ata-lowlevel.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/ata/ata-lowlevel.c 230132 2012-01-15 13:23:18Z uqs $");
+__FBSDID("$FreeBSD: head/sys/dev/ata/ata-lowlevel.c 238673 2012-07-21 14:59:43Z mav $");
 
 #include "opt_ata.h"
 #include <sys/param.h>
@@ -836,23 +836,21 @@
 ata_pio_read(struct ata_request *request, int length)
 {
     struct ata_channel *ch = device_get_softc(request->parent);
+    uint8_t *addr;
     int size = min(request->transfersize, length);
     int resid;
     uint8_t buf[2];
 
-    if (ch->flags & ATA_USE_16BIT || (size % sizeof(int32_t))) {
-	ATA_IDX_INSW_STRM(ch, ATA_DATA,
-			  (void*)((uintptr_t)request->data+request->donecount),
-			  size / sizeof(int16_t));
+    addr = (uint8_t *)request->data + request->donecount;
+    if (ch->flags & ATA_USE_16BIT || (size % sizeof(int32_t)) ||
+	((uintptr_t)addr % sizeof(int32_t))) {
+	ATA_IDX_INSW_STRM(ch, ATA_DATA, (void*)addr, size / sizeof(int16_t));
 	if (size & 1) {
 	    ATA_IDX_INSW_STRM(ch, ATA_DATA, (void*)buf, 1);
-	    ((uint8_t *)request->data + request->donecount +
-		(size & ~1))[0] = buf[0];
+	    (addr + (size & ~1))[0] = buf[0];
 	}
     } else
-	ATA_IDX_INSL_STRM(ch, ATA_DATA,
-			  (void*)((uintptr_t)request->data+request->donecount),
-			  size / sizeof(int32_t));
+	ATA_IDX_INSL_STRM(ch, ATA_DATA, (void*)addr, size / sizeof(int32_t));
 
     if (request->transfersize < length) {
 	device_printf(request->parent, "WARNING - %s read data overrun %d>%d\n",
@@ -867,23 +865,21 @@
 ata_pio_write(struct ata_request *request, int length)
 {
     struct ata_channel *ch = device_get_softc(request->parent);
+    uint8_t *addr;
     int size = min(request->transfersize, length);
     int resid;
     uint8_t buf[2];
 
-    if (ch->flags & ATA_USE_16BIT || (size % sizeof(int32_t))) {
-	ATA_IDX_OUTSW_STRM(ch, ATA_DATA,
-			   (void*)((uintptr_t)request->data+request->donecount),
-			   size / sizeof(int16_t));
+    addr = (uint8_t *)request->data + request->donecount;
+    if (ch->flags & ATA_USE_16BIT || (size % sizeof(int32_t)) ||
+	((uintptr_t)addr % sizeof(int32_t))) {
+	ATA_IDX_OUTSW_STRM(ch, ATA_DATA, (void*)addr, size / sizeof(int16_t));
 	if (size & 1) {
-	    buf[0] = ((uint8_t *)request->data + request->donecount +
-		(size & ~1))[0];
+	    buf[0] = (addr + (size & ~1))[0];
 	    ATA_IDX_OUTSW_STRM(ch, ATA_DATA, (void*)buf, 1);
 	}
     } else
-	ATA_IDX_OUTSL_STRM(ch, ATA_DATA,
-			   (void*)((uintptr_t)request->data+request->donecount),
-			   size / sizeof(int32_t));
+	ATA_IDX_OUTSL_STRM(ch, ATA_DATA, (void*)addr, size / sizeof(int32_t));
 
     if (request->transfersize < length) {
 	device_printf(request->parent, "WARNING - %s write data underrun %d>%d\n",
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/ata/chipsets/ata-ite.c
--- a/head/sys/dev/ata/chipsets/ata-ite.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/ata/chipsets/ata-ite.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/ata/chipsets/ata-ite.c 233282 2012-03-21 16:59:39Z marius $");
+__FBSDID("$FreeBSD: head/sys/dev/ata/chipsets/ata-ite.c 237107 2012-06-14 22:19:23Z marius $");
 
 #include "opt_ata.h"
 #include <sys/param.h>
@@ -105,10 +105,10 @@
 
 	ctlr->setmode = ata_ite_821x_setmode;
 	/* No timing restrictions initally. */
-	ctlr->chipset_data = (void *)0;
+	ctlr->chipset_data = NULL;
     }
     ctlr->ch_attach = ata_ite_ch_attach;
-    return 0;
+    return (0);
 }
 
 static int
@@ -119,6 +119,9 @@
  
 	error = ata_pci_ch_attach(dev);
 	ch->flags |= ATA_CHECKS_CABLE;
+#ifdef ATA_CAM
+	ch->flags |= ATA_NO_ATAPI_DMA;
+#endif
 	return (error);
 }
 
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/ata/chipsets/ata-via.c
--- a/head/sys/dev/ata/chipsets/ata-via.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/ata/chipsets/ata-via.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/ata/chipsets/ata-via.c 233282 2012-03-21 16:59:39Z marius $");
+__FBSDID("$FreeBSD: head/sys/dev/ata/chipsets/ata-via.c 238203 2012-07-07 17:20:52Z eadler $");
 
 #include "opt_ata.h"
 #include <sys/param.h>
@@ -472,12 +472,10 @@
 static int
 ata_via_sata_scr_read(device_t dev, int port, int reg, u_int32_t *result)
 {
-	struct ata_channel *ch;
 	device_t parent;
 	uint32_t val;
 
 	parent = device_get_parent(dev);
-	ch = device_get_softc(dev);
 	port = (port == 1) ? 1 : 0;
 	switch (reg) {
 	case ATA_SSTATUS:
@@ -520,12 +518,10 @@
 static int
 ata_via_sata_scr_write(device_t dev, int port, int reg, u_int32_t value)
 {
-	struct ata_channel *ch;
 	device_t parent;
 	uint32_t val;
 
 	parent = device_get_parent(dev);
-	ch = device_get_softc(dev);
 	port = (port == 1) ? 1 : 0;
 	switch (reg) {
 	case ATA_SERROR:
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/atkbdc/atkbdc_isa.c
--- a/head/sys/dev/atkbdc/atkbdc_isa.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/atkbdc/atkbdc_isa.c	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/atkbdc/atkbdc_isa.c 233619 2012-03-28 17:58:37Z jkim $");
+__FBSDID("$FreeBSD: head/sys/dev/atkbdc/atkbdc_isa.c 238164 2012-07-06 12:13:28Z jhb $");
 
 #include "opt_kbd.h"
 
@@ -87,6 +87,7 @@
 
 static struct isa_pnp_id atkbdc_ids[] = {
 	{ 0x0303d041, "Keyboard controller (i8042)" },	/* PNP0303 */
+	{ 0x0b03d041, "Keyboard controller (i8042)" },	/* PNP030B */
 	{ 0x2003d041, "Keyboard controller (i8042)" },	/* PNP0320 */
 	{ 0 }
 };
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bce/if_bce.c
--- a/head/sys/dev/bce/if_bce.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bce/if_bce.c	Wed Jul 25 17:04:43 2012 +0300
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/bce/if_bce.c 234121 2012-04-11 06:34:25Z yongari $");
+__FBSDID("$FreeBSD: head/sys/dev/bce/if_bce.c 235816 2012-05-23 01:20:25Z yongari $");
 
 /*
  * The following controllers are supported by this driver:
@@ -364,6 +364,7 @@
 static void bce_get_rx_buffer_sizes(struct bce_softc *, int);
 static void bce_get_media			(struct bce_softc *);
 static void bce_init_media			(struct bce_softc *);
+static u32 bce_get_rphy_link		(struct bce_softc *);
 static void bce_dma_map_addr		(void *, bus_dma_segment_t *, int, int);
 static int  bce_dma_alloc			(device_t);
 static void bce_dma_free			(struct bce_softc *);
@@ -372,6 +373,7 @@
 /****************************************************************************/
 /* BCE Firmware Synchronization and Load                                    */
 /****************************************************************************/
+static void bce_fw_cap_init			(struct bce_softc *);
 static int  bce_fw_sync			(struct bce_softc *, u32);
 static void bce_load_rv2p_fw		(struct bce_softc *, u32 *, u32, u32);
 static void bce_load_cpu_fw		(struct bce_softc *,
@@ -418,6 +420,7 @@
 static int  bce_ifmedia_upd		(struct ifnet *);
 static int  bce_ifmedia_upd_locked	(struct ifnet *);
 static void bce_ifmedia_sts		(struct ifnet *, struct ifmediareq *);
+static void bce_ifmedia_sts_rphy	(struct bce_softc *, struct ifmediareq *);
 static void bce_init_locked		(struct bce_softc *);
 static void bce_init				(void *);
 static void bce_mgmt_init_locked	(struct bce_softc *sc);
@@ -527,7 +530,7 @@
 /* Allowable values are TRUE or FALSE. */
 static int bce_strict_rx_mtu = FALSE;
 TUNABLE_INT("hw.bce.strict_rx_mtu", &bce_strict_rx_mtu);
-SYSCTL_UINT(_hw_bce, OID_AUTO, loose_rx_mtu, CTLFLAG_RDTUN,
+SYSCTL_UINT(_hw_bce, OID_AUTO, strict_rx_mtu, CTLFLAG_RDTUN,
     &bce_strict_rx_mtu, 0,
     "Enable/Disable strict RX frame size checking");
 
@@ -757,6 +760,13 @@
 			printf("2.5G"); i++;
 		}
 
+		if (sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) {
+			if (i > 0) printf("|");
+			printf("Remote PHY(%s)",
+			    sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG ?
+			    "FIBER" : "TP"); i++;
+		}
+
 		if (sc->bce_flags & BCE_MFW_ENABLE_FLAG) {
 			if (i > 0) printf("|");
 			printf("MFW); MFW (%s)\n", sc->bce_mfw_ver);
@@ -1297,6 +1307,9 @@
 	if (val & BCE_PCICFG_MISC_STATUS_32BIT_DET)
 		sc->bce_flags |= BCE_PCI_32BIT_FLAG;
 
+	/* Find the media type for the adapter. */
+	bce_get_media(sc);
+
 	/* Reset controller and announce to bootcode that driver is present. */
 	if (bce_reset(sc, BCE_DRV_MSG_CODE_RESET)) {
 		BCE_PRINTF("%s(%d): Controller reset failed!\n",
@@ -1344,9 +1357,6 @@
 	/* Update statistics once every second. */
 	sc->bce_stats_ticks = 1000000 & 0xffff00;
 
-	/* Find the media type for the adapter. */
-	bce_get_media(sc);
-
 	/* Store data needed by PHY driver for backplane applications */
 	sc->bce_shared_hw_cfg = bce_shmem_rd(sc, BCE_SHARED_HW_CFG_CONFIG);
 	sc->bce_port_hw_cfg   = bce_shmem_rd(sc, BCE_PORT_HW_CFG_CONFIG);
@@ -1386,6 +1396,15 @@
 		ifp->if_capabilities = BCE_IF_CAPABILITIES;
 	}
 
+#if __FreeBSD_version >= 800505
+	/*
+	 * Introducing IFCAP_LINKSTATE didn't bump __FreeBSD_version
+	 * so it's approximate value.
+	 */
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0)
+		ifp->if_capabilities |= IFCAP_LINKSTATE;
+#endif
+
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/*
@@ -1409,14 +1428,52 @@
 	/* Handle any special PHY initialization for SerDes PHYs. */
 	bce_init_media(sc);
 
-	/* MII child bus by attaching the PHY. */
-	rc = mii_attach(dev, &sc->bce_miibus, ifp, bce_ifmedia_upd,
-	    bce_ifmedia_sts, BMSR_DEFCAPMASK, sc->bce_phy_addr,
-	    MII_OFFSET_ANY, MIIF_DOPAUSE);
-	if (rc != 0) {
-		BCE_PRINTF("%s(%d): attaching PHYs failed\n", __FILE__,
-		    __LINE__);
-		goto bce_attach_fail;
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0) {
+		ifmedia_init(&sc->bce_ifmedia, IFM_IMASK, bce_ifmedia_upd,
+		    bce_ifmedia_sts);
+		/*
+		 * We can't manually override remote PHY's link and assume
+		 * PHY port configuration(Fiber or TP) is not changed after
+		 * device attach.  This may not be correct though.
+		 */
+		if ((sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG) != 0) {
+			if (sc->bce_phy_flags & BCE_PHY_2_5G_CAPABLE_FLAG) {
+				ifmedia_add(&sc->bce_ifmedia,
+				    IFM_ETHER | IFM_2500_SX, 0, NULL);
+				ifmedia_add(&sc->bce_ifmedia,
+				    IFM_ETHER | IFM_2500_SX | IFM_FDX, 0, NULL);
+			}
+			ifmedia_add(&sc->bce_ifmedia,
+			    IFM_ETHER | IFM_1000_SX, 0, NULL);
+			ifmedia_add(&sc->bce_ifmedia,
+			    IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL);
+		} else {
+			ifmedia_add(&sc->bce_ifmedia,
+			    IFM_ETHER | IFM_10_T, 0, NULL);
+			ifmedia_add(&sc->bce_ifmedia,
+			    IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL);
+			ifmedia_add(&sc->bce_ifmedia,
+			    IFM_ETHER | IFM_100_TX, 0, NULL);
+			ifmedia_add(&sc->bce_ifmedia,
+			    IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL);
+			ifmedia_add(&sc->bce_ifmedia,
+			    IFM_ETHER | IFM_1000_T, 0, NULL);
+			ifmedia_add(&sc->bce_ifmedia,
+			    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
+		}
+		ifmedia_add(&sc->bce_ifmedia, IFM_ETHER | IFM_AUTO, 0, NULL);
+		ifmedia_set(&sc->bce_ifmedia, IFM_ETHER | IFM_AUTO);
+		sc->bce_ifmedia.ifm_media = sc->bce_ifmedia.ifm_cur->ifm_media;
+	} else {
+		/* MII child bus by attaching the PHY. */
+		rc = mii_attach(dev, &sc->bce_miibus, ifp, bce_ifmedia_upd,
+		    bce_ifmedia_sts, BMSR_DEFCAPMASK, sc->bce_phy_addr,
+		    MII_OFFSET_ANY, MIIF_DOPAUSE);
+		if (rc != 0) {
+			BCE_PRINTF("%s(%d): attaching PHYs failed\n", __FILE__,
+			    __LINE__);
+			goto bce_attach_fail;
+		}
 	}
 
 	/* Attach to the Ethernet interface list. */
@@ -1521,8 +1578,12 @@
 	ether_ifdetach(ifp);
 
 	/* If we have a child device on the MII bus remove it too. */
-	bus_generic_detach(dev);
-	device_delete_child(dev, sc->bce_miibus);
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0)
+		ifmedia_removeall(&sc->bce_ifmedia);
+	else {
+		bus_generic_detach(dev);
+		device_delete_child(dev, sc->bce_miibus);
+	}
 
 	/* Release all remaining resources. */
 	bce_release_resources(sc);
@@ -1983,13 +2044,28 @@
 {
 	struct bce_softc *sc;
 	struct mii_data *mii;
-	int val;
+	struct ifmediareq ifmr;
+	int media_active, media_status, val;
 
 	sc = device_get_softc(dev);
 
 	DBENTER(BCE_VERBOSE_PHY);
 
-	mii = device_get_softc(sc->bce_miibus);
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0) {
+		bzero(&ifmr, sizeof(ifmr));
+		bce_ifmedia_sts_rphy(sc, &ifmr);
+		media_active = ifmr.ifm_active;
+		media_status = ifmr.ifm_status;
+	} else {
+		mii = device_get_softc(sc->bce_miibus);
+		media_active = mii->mii_media_active;
+		media_status = mii->mii_media_status;
+	}
+
+	/* Ignore invalid media status. */
+	if ((media_status & (IFM_ACTIVE | IFM_AVALID)) !=
+	    (IFM_ACTIVE | IFM_AVALID))
+		goto bce_miibus_statchg_exit;
 
 	val = REG_RD(sc, BCE_EMAC_MODE);
 	val &= ~(BCE_EMAC_MODE_PORT | BCE_EMAC_MODE_HALF_DUPLEX |
@@ -1997,7 +2073,7 @@
 	    BCE_EMAC_MODE_25G);
 
 	/* Set MII or GMII interface based on the PHY speed. */
-	switch (IFM_SUBTYPE(mii->mii_media_active)) {
+	switch (IFM_SUBTYPE(media_active)) {
 	case IFM_10_T:
 		if (BCE_CHIP_NUM(sc) != BCE_CHIP_NUM_5706) {
 			DBPRINT(sc, BCE_INFO_PHY,
@@ -2026,7 +2102,7 @@
 	}
 
 	/* Set half or full duplex based on PHY settings. */
-	if ((mii->mii_media_active & IFM_GMASK) == IFM_HDX) {
+	if ((IFM_OPTIONS(media_active) & IFM_FDX) == 0) {
 		DBPRINT(sc, BCE_INFO_PHY,
 		    "Setting Half-Duplex interface.\n");
 		val |= BCE_EMAC_MODE_HALF_DUPLEX;
@@ -2036,7 +2112,7 @@
 
 	REG_WR(sc, BCE_EMAC_MODE, val);
 
- 	if ((mii->mii_media_active & IFM_ETH_RXPAUSE) != 0) {
+	if ((IFM_OPTIONS(media_active) & IFM_ETH_RXPAUSE) != 0) {
 		DBPRINT(sc, BCE_INFO_PHY,
 		    "%s(): Enabling RX flow control.\n", __FUNCTION__);
 		BCE_SETBIT(sc, BCE_EMAC_RX_MODE, BCE_EMAC_RX_MODE_FLOW_EN);
@@ -2046,7 +2122,7 @@
 		BCE_CLRBIT(sc, BCE_EMAC_RX_MODE, BCE_EMAC_RX_MODE_FLOW_EN);
 	}
 
- 	if ((mii->mii_media_active & IFM_ETH_TXPAUSE) != 0) {
+	if ((IFM_OPTIONS(media_active) & IFM_ETH_TXPAUSE) != 0) {
 		DBPRINT(sc, BCE_INFO_PHY,
 		    "%s(): Enabling TX flow control.\n", __FUNCTION__);
 		BCE_SETBIT(sc, BCE_EMAC_TX_MODE, BCE_EMAC_TX_MODE_FLOW_EN);
@@ -2060,6 +2136,7 @@
 
 	/* ToDo: Update watermarks in bce_init_rx_context(). */
 
+bce_miibus_statchg_exit:
 	DBEXIT(BCE_VERBOSE_PHY);
 }
 
@@ -3130,7 +3207,8 @@
 static void
 bce_init_media(struct bce_softc *sc)
 {
-	if ((sc->bce_phy_flags & BCE_PHY_IEEE_CLAUSE_45_FLAG) != 0) {
+	if ((sc->bce_phy_flags & (BCE_PHY_IEEE_CLAUSE_45_FLAG |
+	    BCE_PHY_REMOTE_CAP_FLAG)) == BCE_PHY_IEEE_CLAUSE_45_FLAG) {
 		/*
 		 * Configure 5709S/5716S PHYs to use traditional IEEE
 		 * Clause 22 method. Otherwise we have no way to attach
@@ -4925,14 +5003,25 @@
 static int
 bce_reset(struct bce_softc *sc, u32 reset_code)
 {
-	u32 val;
+	u32 emac_mode_save, val;
 	int i, rc = 0;
+	static const u32 emac_mode_mask = BCE_EMAC_MODE_PORT |
+	    BCE_EMAC_MODE_HALF_DUPLEX | BCE_EMAC_MODE_25G;
 
 	DBENTER(BCE_VERBOSE_RESET);
 
 	DBPRINT(sc, BCE_VERBOSE_RESET, "%s(): reset_code = 0x%08X\n",
 	    __FUNCTION__, reset_code);
 
+	/*
+	 * If ASF/IPMI is operational, then the EMAC Mode register already
+	 * contains appropriate values for the link settings that have
+	 * been auto-negotiated.  Resetting the chip will clobber those
+	 * values.  Save the important bits so we can restore them after
+	 * the reset.
+	 */
+	emac_mode_save = REG_RD(sc, BCE_EMAC_MODE) & emac_mode_mask;
+
 	/* Wait for pending PCI transactions to complete. */
 	REG_WR(sc, BCE_MISC_ENABLE_CLR_BITS,
 	    BCE_MISC_ENABLE_CLR_BITS_TX_DMA_ENABLE |
@@ -5018,8 +5107,15 @@
 	if (rc)
 		BCE_PRINTF("%s(%d): Firmware did not complete "
 		    "initialization!\n", __FILE__, __LINE__);
+	/* Get firmware capabilities. */
+	bce_fw_cap_init(sc);
 
 bce_reset_exit:
+	/* Restore EMAC Mode bits needed to keep ASF/IPMI running. */
+	val = REG_RD(sc, BCE_EMAC_MODE);
+	val = (val & ~emac_mode_mask) | emac_mode_save;
+	REG_WR(sc, BCE_EMAC_MODE, val);
+
 	DBEXIT(BCE_VERBOSE_RESET);
 	return (rc);
 }
@@ -6081,6 +6177,55 @@
 }
 
 
+static u32
+bce_get_rphy_link(struct bce_softc *sc)
+{
+	u32 advertise, link;
+	int fdpx;
+
+	advertise = 0;
+	fdpx = 0;
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG) != 0)
+		link = bce_shmem_rd(sc, BCE_RPHY_SERDES_LINK);
+	else
+		link = bce_shmem_rd(sc, BCE_RPHY_COPPER_LINK);
+	if (link & BCE_NETLINK_ANEG_ENB)
+		advertise |= BCE_NETLINK_ANEG_ENB;
+	if (link & BCE_NETLINK_SPEED_10HALF)
+		advertise |= BCE_NETLINK_SPEED_10HALF;
+	if (link & BCE_NETLINK_SPEED_10FULL) {
+		advertise |= BCE_NETLINK_SPEED_10FULL;
+		fdpx++;
+	}
+	if (link & BCE_NETLINK_SPEED_100HALF)
+		advertise |= BCE_NETLINK_SPEED_100HALF;
+	if (link & BCE_NETLINK_SPEED_100FULL) {
+		advertise |= BCE_NETLINK_SPEED_100FULL;
+		fdpx++;
+	}
+	if (link & BCE_NETLINK_SPEED_1000HALF)
+		advertise |= BCE_NETLINK_SPEED_1000HALF;
+	if (link & BCE_NETLINK_SPEED_1000FULL) {
+		advertise |= BCE_NETLINK_SPEED_1000FULL;
+		fdpx++;
+	}
+	if (link & BCE_NETLINK_SPEED_2500HALF)
+		advertise |= BCE_NETLINK_SPEED_2500HALF;
+	if (link & BCE_NETLINK_SPEED_2500FULL) {
+		advertise |= BCE_NETLINK_SPEED_2500FULL;
+		fdpx++;
+	}
+	if (fdpx)
+		advertise |= BCE_NETLINK_FC_PAUSE_SYM |
+		    BCE_NETLINK_FC_PAUSE_ASYM;
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG) == 0)
+		advertise |= BCE_NETLINK_PHY_APP_REMOTE |
+		    BCE_NETLINK_ETH_AT_WIRESPEED;
+
+	return (advertise);
+}
+
+
 /****************************************************************************/
 /* Set media options.                                                       */
 /*                                                                          */
@@ -6116,21 +6261,110 @@
 	struct bce_softc *sc = ifp->if_softc;
 	struct mii_data *mii;
 	struct mii_softc *miisc;
-	int error;
+	struct ifmedia *ifm;
+	u32 link;
+	int error, fdx;
 
 	DBENTER(BCE_VERBOSE_PHY);
 
 	error = 0;
 	BCE_LOCK_ASSERT(sc);
 
-	mii = device_get_softc(sc->bce_miibus);
-
-	/* Make sure the MII bus has been enumerated. */
-	if (mii) {
-		sc->bce_link_up = FALSE;
-		LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
-		    PHY_RESET(miisc);
-		error = mii_mediachg(mii);
+	sc->bce_link_up = FALSE;
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0) {
+		ifm = &sc->bce_ifmedia;
+		if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
+			return (EINVAL);
+		link = 0;
+		fdx = IFM_OPTIONS(ifm->ifm_media) & IFM_FDX;
+		switch(IFM_SUBTYPE(ifm->ifm_media)) {
+		case IFM_AUTO:
+			/*
+			 * Check advertised link of remote PHY by reading
+			 * BCE_RPHY_SERDES_LINK or BCE_RPHY_COPPER_LINK.
+			 * Always use the same link type of remote PHY.
+			 */
+			link = bce_get_rphy_link(sc);
+			break;
+		case IFM_2500_SX:
+			if ((sc->bce_phy_flags &
+			    (BCE_PHY_REMOTE_PORT_FIBER_FLAG |
+			    BCE_PHY_2_5G_CAPABLE_FLAG)) == 0)
+				return (EINVAL);
+			/*
+			 * XXX
+			 * Have to enable forced 2.5Gbps configuration.
+			 */
+			if (fdx != 0)
+				link |= BCE_NETLINK_SPEED_2500FULL;
+			else
+				link |= BCE_NETLINK_SPEED_2500HALF;
+			break;
+		case IFM_1000_SX:
+			if ((sc->bce_phy_flags &
+			    BCE_PHY_REMOTE_PORT_FIBER_FLAG) == 0)
+				return (EINVAL);
+			/*
+			 * XXX
+			 * Have to disable 2.5Gbps configuration.
+			 */
+			if (fdx != 0)
+				link = BCE_NETLINK_SPEED_1000FULL;
+			else
+				link = BCE_NETLINK_SPEED_1000HALF;
+			break;
+		case IFM_1000_T:
+			if (sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG)
+				return (EINVAL);
+			if (fdx != 0)
+				link = BCE_NETLINK_SPEED_1000FULL;
+			else
+				link = BCE_NETLINK_SPEED_1000HALF;
+			break;
+		case IFM_100_TX:
+			if (sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG)
+				return (EINVAL);
+			if (fdx != 0)
+				link = BCE_NETLINK_SPEED_100FULL;
+			else
+				link = BCE_NETLINK_SPEED_100HALF;
+			break;
+		case IFM_10_T:
+			if (sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG)
+				return (EINVAL);
+			if (fdx != 0)
+				link = BCE_NETLINK_SPEED_10FULL;
+			else
+				link = BCE_NETLINK_SPEED_10HALF;
+			break;
+		default:
+			return (EINVAL);
+		}
+		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO) {
+			/*
+			 * XXX
+			 * Advertise pause capability for full-duplex media.
+			 */
+			if (fdx != 0)
+				link |= BCE_NETLINK_FC_PAUSE_SYM |
+				    BCE_NETLINK_FC_PAUSE_ASYM;
+			if ((sc->bce_phy_flags &
+			    BCE_PHY_REMOTE_PORT_FIBER_FLAG) == 0)
+				link |= BCE_NETLINK_PHY_APP_REMOTE |
+				    BCE_NETLINK_ETH_AT_WIRESPEED;
+		}
+
+		bce_shmem_wr(sc, BCE_MB_ARGS_0, link);
+		error = bce_fw_sync(sc, BCE_DRV_MSG_CODE_CMD_SET_LINK);
+	} else {
+		mii = device_get_softc(sc->bce_miibus);
+
+		/* Make sure the MII bus has been enumerated. */
+		if (mii) {
+			LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
+				PHY_RESET(miisc);
+			error = mii_mediachg(mii);
+		}
 	}
 
 	DBEXIT(BCE_VERBOSE_PHY);
@@ -6138,6 +6372,85 @@
 }
 
 
+static void
+bce_ifmedia_sts_rphy(struct bce_softc *sc, struct ifmediareq *ifmr)
+{
+	struct ifnet *ifp;
+	u32 link;
+
+	ifp = sc->bce_ifp;
+	BCE_LOCK_ASSERT(sc);
+
+	ifmr->ifm_status = IFM_AVALID;
+	ifmr->ifm_active = IFM_ETHER;
+	link = bce_shmem_rd(sc, BCE_LINK_STATUS);
+	/* XXX Handle heart beat status? */
+	if ((link & BCE_LINK_STATUS_LINK_UP) != 0)
+		ifmr->ifm_status |= IFM_ACTIVE;
+	else {
+		ifmr->ifm_active |= IFM_NONE;
+		ifp->if_baudrate = 0;
+		return;
+	}
+	switch (link & BCE_LINK_STATUS_SPEED_MASK) {
+	case BCE_LINK_STATUS_10HALF:
+		ifmr->ifm_active |= IFM_10_T | IFM_HDX;
+		ifp->if_baudrate = IF_Mbps(10UL);
+		break;
+	case BCE_LINK_STATUS_10FULL:
+		ifmr->ifm_active |= IFM_10_T | IFM_FDX;
+		ifp->if_baudrate = IF_Mbps(10UL);
+		break;
+	case BCE_LINK_STATUS_100HALF:
+		ifmr->ifm_active |= IFM_100_TX | IFM_HDX;
+		ifp->if_baudrate = IF_Mbps(100UL);
+		break;
+	case BCE_LINK_STATUS_100FULL:
+		ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
+		ifp->if_baudrate = IF_Mbps(100UL);
+		break;
+	case BCE_LINK_STATUS_1000HALF:
+		if ((sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG) == 0)
+			ifmr->ifm_active |= IFM_1000_T | IFM_HDX;
+		else
+			ifmr->ifm_active |= IFM_1000_SX | IFM_HDX;
+		ifp->if_baudrate = IF_Mbps(1000UL);
+		break;
+	case BCE_LINK_STATUS_1000FULL:
+		if ((sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG) == 0)
+			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
+		else
+			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
+		ifp->if_baudrate = IF_Mbps(1000UL);
+		break;
+	case BCE_LINK_STATUS_2500HALF:
+		if ((sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG) == 0) {
+			ifmr->ifm_active |= IFM_NONE;
+			return;
+		} else
+			ifmr->ifm_active |= IFM_2500_SX | IFM_HDX;
+		ifp->if_baudrate = IF_Mbps(2500UL);
+		break;
+	case BCE_LINK_STATUS_2500FULL:
+		if ((sc->bce_phy_flags & BCE_PHY_REMOTE_PORT_FIBER_FLAG) == 0) {
+			ifmr->ifm_active |= IFM_NONE;
+			return;
+		} else
+			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
+		ifp->if_baudrate = IF_Mbps(2500UL);
+		break;
+	default:
+		ifmr->ifm_active |= IFM_NONE;
+		return;
+	}
+
+	if ((link & BCE_LINK_STATUS_RX_FC_ENABLED) != 0)
+		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
+	if ((link & BCE_LINK_STATUS_TX_FC_ENABLED) != 0)
+		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
+}
+
+
 /****************************************************************************/
 /* Reports current media status.                                            */
 /*                                                                          */
@@ -6158,11 +6471,15 @@
 		BCE_UNLOCK(sc);
 		return;
 	}
-	mii = device_get_softc(sc->bce_miibus);
-
-	mii_pollstat(mii);
-	ifmr->ifm_active = mii->mii_media_active;
-	ifmr->ifm_status = mii->mii_media_status;
+
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0)
+		bce_ifmedia_sts_rphy(sc, ifmr);
+	else {
+		mii = device_get_softc(sc->bce_miibus);
+		mii_pollstat(mii);
+		ifmr->ifm_active = mii->mii_media_active;
+		ifmr->ifm_status = mii->mii_media_status;
+	}
 
 	BCE_UNLOCK(sc);
 
@@ -6199,14 +6516,26 @@
 			    STATUS_ATTN_BITS_LINK_STATE);
 			DBPRINT(sc, BCE_INFO_PHY, "%s(): Link is now UP.\n",
 			    __FUNCTION__);
-		}
-		else {
+		} else {
 			REG_WR(sc, BCE_PCICFG_STATUS_BIT_CLEAR_CMD,
 			    STATUS_ATTN_BITS_LINK_STATE);
 			DBPRINT(sc, BCE_INFO_PHY, "%s(): Link is now DOWN.\n",
 			    __FUNCTION__);
 		}
 
+		if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0) {
+			if (new_link_state) {
+				if (bootverbose)
+					if_printf(sc->bce_ifp, "link UP\n");
+				if_link_state_change(sc->bce_ifp,
+				    LINK_STATE_UP);
+			} else {
+				if (bootverbose)
+					if_printf(sc->bce_ifp, "link DOWN\n");
+				if_link_state_change(sc->bce_ifp,
+				    LINK_STATE_DOWN);
+			}
+		}
 		/*
 		 * Assume link is down and allow
 		 * tick routine to update the state
@@ -6842,6 +7171,8 @@
 	bcopy(IF_LLADDR(sc->bce_ifp), sc->eaddr, ETHER_ADDR_LEN);
 	bce_set_mac_addr(sc);
 
+	if (bce_hdr_split == FALSE)
+		bce_get_rx_buffer_sizes(sc, ifp->if_mtu);
 	/*
 	 * Calculate and program the hardware Ethernet MTU
  	 * size. Be generous on the receive if we have room
@@ -7436,22 +7767,10 @@
 
 		BCE_LOCK(sc);
 		ifp->if_mtu = ifr->ifr_mtu;
-
-		if (bce_hdr_split == FALSE) {
-			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-				/*
-				 * Because allocation size is used in RX
-				 * buffer allocation, stop controller if
-				 * it is already running.
-				 */
-				bce_stop(sc);
-			}
-
-			bce_get_rx_buffer_sizes(sc, ifp->if_mtu);
-
+		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			bce_init_locked(sc);
 		}
-
 		BCE_UNLOCK(sc);
 		break;
 
@@ -7505,10 +7824,14 @@
 	case SIOCGIFMEDIA:
 		DBPRINT(sc, BCE_VERBOSE_MISC,
 		    "Received SIOCSIFMEDIA/SIOCGIFMEDIA\n");
-
-		mii = device_get_softc(sc->bce_miibus);
-		error = ifmedia_ioctl(ifp, ifr,
-		    &mii->mii_media, command);
+		if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0)
+			error = ifmedia_ioctl(ifp, ifr, &sc->bce_ifmedia,
+			    command);
+		else {
+			mii = device_get_softc(sc->bce_miibus);
+			error = ifmedia_ioctl(ifp, ifr, &mii->mii_media,
+			    command);
+		}
 		break;
 
 	/* Set interface capability */
@@ -8173,6 +8496,7 @@
 	struct bce_softc *sc = xsc;
 	struct mii_data *mii;
 	struct ifnet *ifp;
+	struct ifmediareq ifmr;
 
 	ifp = sc->bce_ifp;
 
@@ -8203,21 +8527,32 @@
 		goto bce_tick_exit;
 
 	/* Link is down.  Check what the PHY's doing. */
-	mii = device_get_softc(sc->bce_miibus);
-	mii_tick(mii);
-
-	/* Check if the link has come up. */
-	if ((mii->mii_media_status & IFM_ACTIVE) &&
-	    (IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE)) {
-		DBPRINT(sc, BCE_VERBOSE_MISC,
-		    "%s(): Link up!\n", __FUNCTION__);
-		sc->bce_link_up = TRUE;
-		if ((IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T ||
-		    IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX ||
-		    IFM_SUBTYPE(mii->mii_media_active) == IFM_2500_SX) &&
-		    (bce_verbose || bootverbose))
-			BCE_PRINTF("Gigabit link up!\n");
-
+	if ((sc->bce_phy_flags & BCE_PHY_REMOTE_CAP_FLAG) != 0) {
+		bzero(&ifmr, sizeof(ifmr));
+		bce_ifmedia_sts_rphy(sc, &ifmr);
+		if ((ifmr.ifm_status & (IFM_ACTIVE | IFM_AVALID)) ==
+		    (IFM_ACTIVE | IFM_AVALID)) {
+			sc->bce_link_up = TRUE;
+			bce_miibus_statchg(sc->bce_dev);
+		}
+	} else {
+		mii = device_get_softc(sc->bce_miibus);
+		mii_tick(mii);
+		/* Check if the link has come up. */
+		if ((mii->mii_media_status & IFM_ACTIVE) &&
+		    (IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE)) {
+			DBPRINT(sc, BCE_VERBOSE_MISC, "%s(): Link up!\n",
+			    __FUNCTION__);
+			sc->bce_link_up = TRUE;
+			if ((IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T ||
+			    IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX ||
+			    IFM_SUBTYPE(mii->mii_media_active) == IFM_2500_SX) &&
+			    (bce_verbose || bootverbose))
+				BCE_PRINTF("Gigabit link up!\n");
+		}
+
+	}
+	if (sc->bce_link_up == TRUE) {
 		/* Now that link is up, handle any outstanding TX traffic. */
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 			DBPRINT(sc, BCE_VERBOSE_MISC, "%s(): Found "
@@ -8231,6 +8566,36 @@
 	return;
 }
 
+static void
+bce_fw_cap_init(struct bce_softc *sc)
+{
+	u32 ack, cap, link;
+
+	ack = 0;
+	cap = bce_shmem_rd(sc, BCE_FW_CAP_MB);
+	if ((cap & BCE_FW_CAP_SIGNATURE_MAGIC_MASK) !=
+	    BCE_FW_CAP_SIGNATURE_MAGIC)
+		return;
+	if ((cap & (BCE_FW_CAP_MFW_KEEP_VLAN | BCE_FW_CAP_BC_KEEP_VLAN)) ==
+	    (BCE_FW_CAP_MFW_KEEP_VLAN | BCE_FW_CAP_BC_KEEP_VLAN))
+		ack |= BCE_DRV_ACK_CAP_SIGNATURE_MAGIC |
+		    BCE_FW_CAP_MFW_KEEP_VLAN | BCE_FW_CAP_BC_KEEP_VLAN;
+	if ((sc->bce_phy_flags & BCE_PHY_SERDES_FLAG) != 0 &&
+	    (cap & BCE_FW_CAP_REMOTE_PHY_CAP) != 0) {
+		sc->bce_phy_flags &= ~BCE_PHY_REMOTE_PORT_FIBER_FLAG;
+		sc->bce_phy_flags |= BCE_PHY_REMOTE_CAP_FLAG;
+		link = bce_shmem_rd(sc, BCE_LINK_STATUS);
+		if ((link & BCE_LINK_STATUS_SERDES_LINK) != 0)
+			sc->bce_phy_flags |= BCE_PHY_REMOTE_PORT_FIBER_FLAG;
+		ack |= BCE_DRV_ACK_CAP_SIGNATURE_MAGIC |
+		    BCE_FW_CAP_REMOTE_PHY_CAP;
+	}
+
+	if (ack != 0)
+		bce_shmem_wr(sc, BCE_DRV_ACK_CAP_MB, ack);
+}
+
+
 #ifdef BCE_DEBUG
 /****************************************************************************/
 /* Allows the driver state to be dumped through the sysctl interface.       */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bce/if_bcereg.h
--- a/head/sys/dev/bce/if_bcereg.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bce/if_bcereg.h	Wed Jul 25 17:04:43 2012 +0300
@@ -26,7 +26,7 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/bce/if_bcereg.h 234121 2012-04-11 06:34:25Z yongari $
+ * $FreeBSD: head/sys/dev/bce/if_bcereg.h 235151 2012-05-09 01:55:23Z yongari $
  */
 
 #ifndef	_BCEREG_H_DEFINED
@@ -814,6 +814,23 @@
 #define BCE_DRV_PULSE_SEQ_MASK			 0x00007fff
 
 #define BCE_MB_ARGS_0				0x00000014
+#define	BCE_NETLINK_SPEED_10HALF		 (1<<0)
+#define	BCE_NETLINK_SPEED_10FULL		 (1<<1)
+#define	BCE_NETLINK_SPEED_100HALF		 (1<<2)
+#define	BCE_NETLINK_SPEED_100FULL		 (1<<3)
+#define	BCE_NETLINK_SPEED_1000HALF		 (1<<4)
+#define	BCE_NETLINK_SPEED_1000FULL		 (1<<5)
+#define	BCE_NETLINK_SPEED_2500HALF		 (1<<6)
+#define	BCE_NETLINK_SPEED_2500FULL		 (1<<7)
+#define	BCE_NETLINK_SPEED_10GHALF		 (1<<8)
+#define	BCE_NETLINK_SPEED_10GFULL		 (1<<9)
+#define	BCE_NETLINK_ANEG_ENB		 	 (1<<10)
+#define	BCE_NETLINK_PHY_APP_REMOTE	 	 (1<<11)
+#define	BCE_NETLINK_FC_PAUSE_SYM	 	 (1<<12)
+#define	BCE_NETLINK_FC_PAUSE_ASYM	 	 (1<<13)
+#define	BCE_NETLINK_ETH_AT_WIRESPEED	 	 (1<<14)
+#define	BCE_NETLINK_PHY_RESET	 	 	 (1<<15)
+
 #define BCE_MB_ARGS_1				0x00000018
 
 /* Indicate to the firmware not to go into the
@@ -1079,6 +1096,26 @@
 #define BCE_BC_STATE_BC_DBG_CMD_LOOP_CNT_MASK	0xffff
 #define BCE_BC_STATE_BC_DBG_CMD_LOOP_INFINITE	0xffff
 
+#define	BCE_FW_EVT_CODE_MB			0x00000354
+#define	BCE_FW_EVT_CODE_SW_TIMER_EXPIRE_EVENT	0x00000000
+#define	BCE_FW_EVT_CODE_LINK_EVENT		0x00000001
+
+#define	BCE_DRV_ACK_CAP_MB			0x00000364
+#define	BCE_DRV_ACK_CAP_SIGNATURE_MAGIC		0x35450000
+
+#define	BCE_FW_CAP_MB				0x00000368
+#define	BCE_FW_CAP_SIGNATURE_MAGIC		0xaa550000
+#define	BCE_FW_ACK_SIGNATURE_MAGIC		0x52500000
+#define	BCE_FW_CAP_SIGNATURE_MAGIC_MASK		0xffff0000
+#define	BCE_FW_CAP_REMOTE_PHY_CAP		0x00000001
+#define	BCE_FW_CAP_REMOTE_PHY_PRESENT		0x00000002
+#define	BCE_FW_CAP_MFW_KEEP_VLAN		0x00000008
+#define	BCE_FW_CAP_BC_KEEP_VLAN			0x00000010
+
+#define	BCE_RPHY_SERDES_LINK			0x00000374
+
+#define	BCE_RPHY_COPPER_LINK			0x00000378
+
 #define HOST_VIEW_SHMEM_BASE			0x167c00
 
 /*
@@ -6454,6 +6491,8 @@
 #define BCE_PHY_INT_MODE_AUTO_POLLING_FLAG	0x00000100
 #define BCE_PHY_INT_MODE_LINK_READY_FLAG	0x00000200
 #define BCE_PHY_IEEE_CLAUSE_45_FLAG		0x00000400
+#define	BCE_PHY_REMOTE_CAP_FLAG			0x00000800
+#define	BCE_PHY_REMOTE_PORT_FIBER_FLAG		0x00001000
 
 	/* Values that need to be shared with the PHY driver. */
 	u32			bce_shared_hw_cfg;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bge/if_bge.c
--- a/head/sys/dev/bge/if_bge.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bge/if_bge.c	Wed Jul 25 17:04:43 2012 +0300
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/bge/if_bge.c 232850 2012-03-12 03:47:30Z yongari $");
+__FBSDID("$FreeBSD: head/sys/dev/bge/if_bge.c 236488 2012-06-02 19:41:28Z marius $");
 
 /*
  * Broadcom BCM570x family gigabit ethernet driver for FreeBSD.
@@ -2368,7 +2368,6 @@
 	if (sc->bge_cdata.bge_tx_mtag)
 		bus_dma_tag_destroy(sc->bge_cdata.bge_tx_mtag);
 
-
 	/* Destroy standard RX ring. */
 	if (sc->bge_cdata.bge_rx_std_ring_map)
 		bus_dmamap_unload(sc->bge_cdata.bge_rx_std_ring_tag,
@@ -2767,9 +2766,8 @@
 	};
 	devclass_t pci, pcib;
 	device_t bus, dev;
-	int count, i;
-
-	count = sizeof(mbox_reorder_lists) / sizeof(mbox_reorder_lists[0]);
+	int i;
+
 	pci = devclass_find("pci");
 	pcib = devclass_find("pcib");
 	dev = sc->bge_dev;
@@ -2777,17 +2775,9 @@
 	for (;;) {
 		dev = device_get_parent(bus);
 		bus = device_get_parent(dev);
-		device_printf(sc->bge_dev, "dev : %s%d, bus : %s%d\n",
-		    device_get_name(dev), device_get_unit(dev),
-		    device_get_name(bus), device_get_unit(bus));
 		if (device_get_devclass(dev) != pcib)
 			break;
-		for (i = 0; i < count; i++) {
-			device_printf(sc->bge_dev,
-			    "probing dev : %s%d, vendor : 0x%04x "
-			    "device : 0x%04x\n",
-			    device_get_name(dev), device_get_unit(dev),
-			    pci_get_vendor(dev), pci_get_device(dev));
+		for (i = 0; i < nitems(mbox_reorder_lists); i++) {
 			if (pci_get_vendor(dev) ==
 			    mbox_reorder_lists[i].vendor &&
 			    pci_get_device(dev) ==
@@ -2869,8 +2859,6 @@
 	sc = device_get_softc(dev);
 	sc->bge_dev = dev;
 
-	bge_add_sysctls(sc);
-
 	TASK_INIT(&sc->bge_intr_task, 0, bge_intr_task, sc);
 
 	/*
@@ -3016,6 +3004,9 @@
 		break;
 	}
 
+	/* Add SYSCTLs, requires the chipset family to be set. */
+	bge_add_sysctls(sc);
+
 	/* Set various PHY bug flags. */
 	if (sc->bge_chipid == BGE_CHIPID_BCM5701_A0 ||
 	    sc->bge_chipid == BGE_CHIPID_BCM5701_B0)
@@ -3638,8 +3629,6 @@
 		/* Clear enable no snoop and disable relaxed ordering. */
 		devctl &= ~(PCIM_EXP_CTL_RELAXED_ORD_ENABLE |
 		    PCIM_EXP_CTL_NOSNOOP_ENABLE);
-		/* Set PCIE max payload size to 128. */
-		devctl &= ~PCIM_EXP_CTL_MAX_PAYLOAD;
 		pci_write_config(dev, sc->bge_expcap + PCIR_EXPRESS_DEVICE_CTL,
 		    devctl, 2);
 		/* Clear error status. */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bge/if_bgereg.h
--- a/head/sys/dev/bge/if_bgereg.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bge/if_bgereg.h	Wed Jul 25 17:04:43 2012 +0300
@@ -30,7 +30,7 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/bge/if_bgereg.h 232850 2012-03-12 03:47:30Z yongari $
+ * $FreeBSD: head/sys/dev/bge/if_bgereg.h 236701 2012-06-07 03:22:20Z yongari $
  */
 
 /*
@@ -779,11 +779,11 @@
 #define	BGE_LEDCTL_10MBPS_LED		0x00000008
 #define	BGE_LEDCTL_TRAFLED_OVERRIDE	0x00000010
 #define	BGE_LEDCTL_TRAFLED_BLINK	0x00000020
-#define	BGE_LEDCTL_TREFLED_BLINK_2	0x00000040
+#define	BGE_LEDCTL_TRAFLED_BLINK_2	0x00000040
 #define	BGE_LEDCTL_1000MBPS_STS		0x00000080
 #define	BGE_LEDCTL_100MBPS_STS		0x00000100
 #define	BGE_LEDCTL_10MBPS_STS		0x00000200
-#define	BGE_LEDCTL_TRADLED_STS		0x00000400
+#define	BGE_LEDCTL_TRAFLED_STS		0x00000400
 #define	BGE_LEDCTL_BLINKPERIOD		0x7FF80000
 #define	BGE_LEDCTL_BLINKPERIOD_OVERRIDE	0x80000000
 
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bwi/bwimac.c
--- a/head/sys/dev/bwi/bwimac.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bwi/bwimac.c	Wed Jul 25 17:04:43 2012 +0300
@@ -35,10 +35,11 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/bwi/bwimac.c 235338 2012-05-12 15:11:53Z adrian $");
 
 #include "opt_inet.h"
 #include "opt_bwi.h"
+#include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/endian.h>
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bwi/bwiphy.c
--- a/head/sys/dev/bwi/bwiphy.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bwi/bwiphy.c	Wed Jul 25 17:04:43 2012 +0300
@@ -35,9 +35,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/bwi/bwiphy.c 226182 2011-10-10 02:54:58Z adrian $");
+__FBSDID("$FreeBSD: head/sys/dev/bwi/bwiphy.c 235338 2012-05-12 15:11:53Z adrian $");
 
 #include "opt_inet.h"
+#include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/endian.h>
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bwi/bwirf.c
--- a/head/sys/dev/bwi/bwirf.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bwi/bwirf.c	Wed Jul 25 17:04:43 2012 +0300
@@ -35,10 +35,11 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/bwi/bwirf.c 235338 2012-05-12 15:11:53Z adrian $");
 
 #include "opt_inet.h"
 #include "opt_bwi.h"
+#include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/endian.h>
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bwi/if_bwi.c
--- a/head/sys/dev/bwi/if_bwi.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bwi/if_bwi.c	Wed Jul 25 17:04:43 2012 +0300
@@ -35,10 +35,11 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/bwi/if_bwi.c 228621 2011-12-17 10:23:17Z bschmidt $");
+__FBSDID("$FreeBSD: head/sys/dev/bwi/if_bwi.c 235338 2012-05-12 15:11:53Z adrian $");
 
 #include "opt_inet.h"
 #include "opt_bwi.h"
+#include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/endian.h>
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/bwi/if_bwi_pci.c
--- a/head/sys/dev/bwi/if_bwi_pci.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/bwi/if_bwi_pci.c	Wed Jul 25 17:04:43 2012 +0300
@@ -28,12 +28,14 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/bwi/if_bwi_pci.c 235338 2012-05-12 15:11:53Z adrian $");
 
 /*
  * PCI/Cardbus front-end for the Broadcom Wireless LAN controller driver.
  */
 
+#include "opt_wlan.h"
+
 #include <sys/param.h>
 #include <sys/systm.h> 
 #include <sys/module.h>
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/common/cxgb_ctl_defs.h
--- a/head/sys/dev/cxgb/common/cxgb_ctl_defs.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/common/cxgb_ctl_defs.h	Wed Jul 25 17:04:43 2012 +0300
@@ -6,7 +6,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
  * release for licensing terms and conditions.
  *
- * $FreeBSD$
+ * $FreeBSD: head/sys/dev/cxgb/common/cxgb_ctl_defs.h 237263 2012-06-19 07:34:13Z np $
  */
 
 #ifndef _CXGB3_OFFLOAD_CTL_DEFS_H
@@ -60,14 +60,12 @@
 	const unsigned short *mtus; /* the MTU table values */
 };
 
-struct net_device;
-
 /*
- * Structure used to request the adapter net_device owning a given MAC address.
+ * Structure used to request the ifnet that owns a given MAC address.
  */
 struct iff_mac {
-	struct net_device *dev;          /* the net_device */
-	const unsigned char *mac_addr;   /* MAC address to lookup */
+	struct ifnet *dev;
+	const unsigned char *mac_addr;
 	u16 vlan_tag;
 };
 
@@ -85,7 +83,7 @@
 
 struct adap_ports {
 	unsigned int nports;     /* number of ports on this adapter */
-	struct net_device *lldevs[MAX_NPORTS];
+	struct ifnet *lldevs[MAX_NPORTS];
 };
 
 /*
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/cxgb_adapter.h
--- a/head/sys/dev/cxgb/cxgb_adapter.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/cxgb_adapter.h	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$FreeBSD: head/sys/dev/cxgb/cxgb_adapter.h 231116 2012-02-07 07:32:39Z np $
+$FreeBSD: head/sys/dev/cxgb/cxgb_adapter.h 237832 2012-06-30 02:11:53Z np $
 
 ***************************************************************************/
 
@@ -46,6 +46,7 @@
 #include <net/if.h>
 #include <net/if_media.h>
 #include <net/if_dl.h>
+#include <netinet/in.h>
 #include <netinet/tcp_lro.h>
 
 #include <machine/bus.h>
@@ -56,7 +57,6 @@
 #include <dev/pci/pcivar.h>
 
 #include <cxgb_osdep.h>
-#include <t3cdev.h>
 #include <sys/mbufq.h>
 
 struct adapter;
@@ -129,6 +129,7 @@
 	CXGB_OFLD_INIT	= (1 << 7),
 	TP_PARITY_INIT	= (1 << 8),
 	CXGB_BUSY	= (1 << 9),
+	TOM_INIT_DONE	= (1 << 10),
 
 	/* port flags */
 	DOOMED		= (1 << 0),
@@ -178,7 +179,6 @@
 	uint32_t        async_notif;
 	uint32_t	cntxt_id;
 	uint32_t        offload_pkts;
-	uint32_t        offload_bundles;
 	uint32_t        pure_rsps;
 	uint32_t        unhandled_irqs;
 	uint32_t        starved;
@@ -265,15 +265,6 @@
 	struct sg_ent  txq_sgl[TX_MAX_SEGS / 2 + 1];
 };
      	
-
-enum {
-	SGE_PSTAT_TSO,              /* # of TSO requests */
-	SGE_PSTAT_RX_CSUM_GOOD,     /* # of successful RX csum offloads */
-	SGE_PSTAT_TX_CSUM,          /* # of TX checksum offloads */
-	SGE_PSTAT_VLANEX,           /* # of VLAN tag extractions */
-	SGE_PSTAT_VLANINS,          /* # of VLAN tag insertions */
-};
-
 #define SGE_PSTAT_MAX (SGE_PSTAT_VLANINS+1)
 
 #define QS_EXITING              0x1
@@ -288,8 +279,8 @@
 	struct lro_state        lro;
 	struct sge_txq		txq[SGE_TXQ_PER_SET];
 	uint32_t                txq_stopped;       /* which Tx queues are stopped */
-	uint64_t                port_stats[SGE_PSTAT_MAX];
 	struct port_info        *port;
+	struct adapter          *adap;
 	int                     idx; /* qset # */
 	int                     qs_flags;
 	int			coalescing;
@@ -306,10 +297,13 @@
 
 struct filter_info;
 
+typedef int (*cpl_handler_t)(struct sge_qset *, struct rsp_desc *,
+    struct mbuf *);
+
 struct adapter {
+	SLIST_ENTRY(adapter)	link;
 	device_t		dev;
 	int			flags;
-	TAILQ_ENTRY(adapter)    adapter_entry;
 
 	/* PCI register resources */
 	int			regs_rid;
@@ -375,11 +369,16 @@
 
 	struct port_info	port[MAX_NPORTS];
 	device_t		portdev[MAX_NPORTS];
-	struct t3cdev           tdev;
+#ifdef TCP_OFFLOAD
+	void 			*tom_softc;
+	void 			*iwarp_softc;
+#endif
 	char                    fw_version[64];
 	char                    port_types[MAX_NPORTS + 1];
 	uint32_t                open_device_map;
-	uint32_t                registered_device_map;
+#ifdef TCP_OFFLOAD
+	int			offload_map;
+#endif
 	struct mtx              lock;
 	driver_intr_t           *cxgb_intr;
 	int                     msi_count;
@@ -391,6 +390,11 @@
 	char                    elmerlockbuf[ADAPTER_LOCK_NAME_LEN];
 
 	int			timestamp;
+
+#ifdef TCP_OFFLOAD
+#define NUM_CPL_HANDLERS	0xa7
+	cpl_handler_t cpl_handler[NUM_CPL_HANDLERS] __aligned(CACHE_LINE_SIZE);
+#endif
 };
 
 struct t3_rx_mode {
@@ -501,10 +505,12 @@
 			int speed, int duplex, int fc, int mac_was_reset);
 void t3_os_phymod_changed(struct adapter *adap, int port_id);
 void t3_sge_err_intr_handler(adapter_t *adapter);
-int t3_offload_tx(struct t3cdev *, struct mbuf *);
+#ifdef TCP_OFFLOAD
+int t3_offload_tx(struct adapter *, struct mbuf *);
+#endif
 void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]);
 int t3_mgmt_tx(adapter_t *adap, struct mbuf *m);
-
+int t3_register_cpl_handler(struct adapter *, int, cpl_handler_t);
 
 int t3_sge_alloc(struct adapter *);
 int t3_sge_free(struct adapter *);
@@ -522,7 +528,7 @@
 int t3_sge_init_port(struct port_info *);
 void t3_free_tx_desc(struct sge_qset *qs, int n, int qid);
 
-void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad);
+void t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad);
 
 void t3_add_attach_sysctls(adapter_t *sc);
 void t3_add_configured_sysctls(adapter_t *sc);
@@ -555,15 +561,9 @@
 	return container_of(q, struct sge_qset, txq[qidx]);
 }
 
-static __inline struct adapter *
-tdev2adap(struct t3cdev *d)
-{
-	return container_of(d, struct adapter, tdev);
-}
-
 #undef container_of
 
-#define OFFLOAD_DEVMAP_BIT 15
+#define OFFLOAD_DEVMAP_BIT (1 << MAX_NPORTS)
 static inline int offload_running(adapter_t *adapter)
 {
         return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
@@ -572,4 +572,5 @@
 void cxgb_tx_watchdog(void *arg);
 int cxgb_transmit(struct ifnet *ifp, struct mbuf *m);
 void cxgb_qflush(struct ifnet *ifp);
+void t3_iterate(void (*)(struct adapter *, void *), void *);
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/cxgb_main.c
--- a/head/sys/dev/cxgb/cxgb_main.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/cxgb_main.c	Wed Jul 25 17:04:43 2012 +0300
@@ -28,7 +28,9 @@
 ***************************************************************************/
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 231317 2012-02-09 23:19:09Z np $");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 237832 2012-06-30 02:11:53Z np $");
+
+#include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -107,6 +109,9 @@
     unsigned int, u64, u64);
 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
     unsigned int, u64, u64);
+#ifdef TCP_OFFLOAD
+static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
+#endif
 
 /* Attachment glue for the PCI controller end of the device.  Each port of
  * the device is attached separately, as defined later.
@@ -119,10 +124,11 @@
     unsigned int end);
 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
 static int cxgb_get_regs_len(void);
-static int offload_open(struct port_info *pi);
 static void touch_bars(device_t dev);
-static int offload_close(struct t3cdev *tdev);
 static void cxgb_update_mac_settings(struct port_info *p);
+#ifdef TCP_OFFLOAD
+static int toe_capability(struct port_info *, int);
+#endif
 
 static device_method_t cxgb_controller_methods[] = {
 	DEVMETHOD(device_probe,		cxgb_controller_probe),
@@ -138,8 +144,11 @@
 	sizeof(struct adapter)
 };
 
+static int cxgbc_mod_event(module_t, int, void *);
 static devclass_t	cxgb_controller_devclass;
-DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
+DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
+    cxgbc_mod_event, 0);
+MODULE_VERSION(cxgbc, 1);
 
 /*
  * Attachment glue for the ports.  Attachment is done directly to the
@@ -177,6 +186,14 @@
 
 static devclass_t	cxgb_port_devclass;
 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
+MODULE_VERSION(cxgb, 1);
+
+static struct mtx t3_list_lock;
+static SLIST_HEAD(, adapter) t3_list;
+#ifdef TCP_OFFLOAD
+static struct mtx t3_uld_list_lock;
+static SLIST_HEAD(, uld_info) t3_uld_list;
+#endif
 
 /*
  * The driver uses the best interrupt scheme available on a platform in the
@@ -195,15 +212,6 @@
     "MSI-X, MSI, INTx selector");
 
 /*
- * The driver enables offload as a default.
- * To disable it, use ofld_disable = 1.
- */
-static int ofld_disable = 0;
-TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
-    "disable ULP offload");
-
-/*
  * The driver uses an auto-queue algorithm by default.
  * To disable it and force a single queue-set per port, use multiq = 0
  */
@@ -445,6 +453,25 @@
 	sc->msi_count = 0;
 	ai = cxgb_get_adapter_info(dev);
 
+	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
+	    device_get_unit(dev));
+	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
+
+	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
+	    device_get_unit(dev));
+	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
+	    device_get_unit(dev));
+	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
+	    device_get_unit(dev));
+	
+	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
+	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
+	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
+
+	mtx_lock(&t3_list_lock);
+	SLIST_INSERT_HEAD(&t3_list, sc, link);
+	mtx_unlock(&t3_list_lock);
+
 	/* find the PCIe link width and set max read request to 4KB*/
 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
 		uint16_t lnk;
@@ -471,24 +498,10 @@
 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
 		device_printf(dev, "Cannot allocate BAR region 0\n");
-		return (ENXIO);
+		error = ENXIO;
+		goto out;
 	}
 
-	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
-	    device_get_unit(dev));
-	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
-
-	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
-	    device_get_unit(dev));
-	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
-	    device_get_unit(dev));
-	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
-	    device_get_unit(dev));
-	
-	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
-	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
-	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
-	
 	sc->bt = rman_get_bustag(sc->regs_res);
 	sc->bh = rman_get_bushandle(sc->regs_res);
 	sc->mmio_len = rman_get_size(sc->regs_res);
@@ -604,7 +617,7 @@
 	} else {
 		sc->flags |= TPS_UPTODATE;
 	}
-	
+
 	/*
 	 * Create a child device for each MAC.  The ethernet attachment
 	 * will be done in these children.
@@ -636,12 +649,7 @@
 	t3_sge_init_adapter(sc);
 
 	t3_led_ready(sc);
-	
-	cxgb_offload_init();
-	if (is_offload(sc)) {
-		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
-		cxgb_adapter_ofld(sc);
-        }
+
 	error = t3_get_fw_version(sc, &vers);
 	if (error)
 		goto out;
@@ -662,6 +670,11 @@
 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
 	t3_add_attach_sysctls(sc);
+
+#ifdef TCP_OFFLOAD
+	for (i = 0; i < NUM_CPL_HANDLERS; i++)
+		sc->cpl_handler[i] = cpl_not_handled;
+#endif
 out:
 	if (error)
 		cxgb_free(sc);
@@ -775,20 +788,9 @@
 		sc->tq = NULL;
 	}
 	
-	if (is_offload(sc)) {
-		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
-		cxgb_adapter_unofld(sc);
-	}
-
-#ifdef notyet
-	if (sc->flags & CXGB_OFLD_INIT)
-		cxgb_offload_deactivate(sc);
-#endif
 	free(sc->filters, M_DEVBUF);
 	t3_sge_free(sc);
 
-	cxgb_offload_exit();
-
 	if (sc->udbs_res != NULL)
 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
 		    sc->udbs_res);
@@ -800,6 +802,9 @@
 	MTX_DESTROY(&sc->mdio_lock);
 	MTX_DESTROY(&sc->sge.reg_lock);
 	MTX_DESTROY(&sc->elmer_lock);
+	mtx_lock(&t3_list_lock);
+	SLIST_REMOVE(&t3_list, sc, adapter, link);
+	mtx_unlock(&t3_list_lock);
 	ADAPTER_LOCK_DEINIT(sc);
 }
 
@@ -981,7 +986,7 @@
 
 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
-    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
+    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
 #define CXGB_CAP_ENABLE CXGB_CAP
 
 static int
@@ -1017,8 +1022,13 @@
 	ifp->if_qflush = cxgb_qflush;
 
 	ifp->if_capabilities = CXGB_CAP;
+#ifdef TCP_OFFLOAD
+	if (is_offload(sc))
+		ifp->if_capabilities |= IFCAP_TOE4;
+#endif
 	ifp->if_capenable = CXGB_CAP_ENABLE;
-	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
+	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
+	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
 
 	/*
 	 * Disable TSO on 4-port - it isn't supported by the firmware.
@@ -1420,65 +1430,6 @@
 	              cpus, rspq_map);
 
 }
-
-/*
- * Sends an mbuf to an offload queue driver
- * after dealing with any active network taps.
- */
-static inline int
-offload_tx(struct t3cdev *tdev, struct mbuf *m)
-{
-	int ret;
-
-	ret = t3_offload_tx(tdev, m);
-	return (ret);
-}
-
-static int
-write_smt_entry(struct adapter *adapter, int idx)
-{
-	struct port_info *pi = &adapter->port[idx];
-	struct cpl_smt_write_req *req;
-	struct mbuf *m;
-
-	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
-		return (ENOMEM);
-
-	req = mtod(m, struct cpl_smt_write_req *);
-	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
-	
-	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
-	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
-	req->iff = idx;
-	memset(req->src_mac1, 0, sizeof(req->src_mac1));
-	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
-
-	m_set_priority(m, 1);
-
-	offload_tx(&adapter->tdev, m);
-
-	return (0);
-}
-
-static int
-init_smt(struct adapter *adapter)
-{
-	int i;
-
-	for_each_port(adapter, i)
-		write_smt_entry(adapter, i);
-	return 0;
-}
-
-static void
-init_port_mtus(adapter_t *adapter)
-{
-	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
-
-	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
-}
-
 static void
 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
 			      int hi, int port)
@@ -1705,45 +1656,6 @@
 	t3_intr_disable(sc);
 }
 
-static int
-offload_open(struct port_info *pi)
-{
-	struct adapter *sc = pi->adapter;
-	struct t3cdev *tdev = &sc->tdev;
-
-	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
-
-	t3_tp_set_offload_mode(sc, 1);
-	tdev->lldev = pi->ifp;
-	init_port_mtus(sc);
-	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
-		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
-	init_smt(sc);
-	cxgb_add_clients(tdev);
-
-	return (0);
-}
-
-static int
-offload_close(struct t3cdev *tdev)
-{
-	struct adapter *adapter = tdev2adap(tdev);
-
-	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
-		return (0);
-
-	/* Call back all registered clients */
-	cxgb_remove_clients(tdev);
-
-	tdev->lldev = NULL;
-	cxgb_set_dummy_ops(tdev);
-	t3_tp_set_offload_mode(adapter, 0);
-
-	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
-
-	return (0);
-}
-
 /*
  * if_init for cxgb ports.
  */
@@ -1793,15 +1705,9 @@
 		ADAPTER_UNLOCK(sc);
 	}
 
-	if (sc->open_device_map == 0) {
-		if ((rc = cxgb_up(sc)) != 0)
+	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
 			goto done;
 
-		if (is_offload(sc) && !ofld_disable && offload_open(p))
-			log(LOG_WARNING,
-			    "Could not initialize offload capabilities\n");
-	}
-
 	PORT_LOCK(p);
 	if (isset(&sc->open_device_map, p->port_id) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
@@ -1929,7 +1835,6 @@
 	DELAY(100 * 1000);
 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
 
-
 	pi->phy.ops->power_down(&pi->phy, 1);
 
 	PORT_UNLOCK(pi);
@@ -1937,9 +1842,6 @@
 	pi->link_config.link_ok = 0;
 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
 
-	if ((sc->open_device_map & PORT_MASK) == 0)
-		offload_close(&sc->tdev);
-
 	if (sc->open_device_map == 0)
 		cxgb_down(pi->adapter);
 
@@ -2049,31 +1951,52 @@
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 
-			if (IFCAP_TSO & ifp->if_capenable &&
+			if (IFCAP_TSO4 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
-				ifp->if_capenable &= ~IFCAP_TSO;
-				ifp->if_hwassist &= ~CSUM_TSO;
+				ifp->if_capenable &= ~IFCAP_TSO4;
 				if_printf(ifp,
-				    "tso disabled due to -txcsum.\n");
+				    "tso4 disabled due to -txcsum.\n");
+			}
+		}
+		if (mask & IFCAP_TXCSUM_IPV6) {
+			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
+
+			if (IFCAP_TSO6 & ifp->if_capenable &&
+			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
+				ifp->if_capenable &= ~IFCAP_TSO6;
+				if_printf(ifp,
+				    "tso6 disabled due to -txcsum6.\n");
 			}
 		}
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
-		if (mask & IFCAP_TSO) {
-			ifp->if_capenable ^= IFCAP_TSO;
-
-			if (IFCAP_TSO & ifp->if_capenable) {
-				if (IFCAP_TXCSUM & ifp->if_capenable)
-					ifp->if_hwassist |= CSUM_TSO;
-				else {
-					ifp->if_capenable &= ~IFCAP_TSO;
-					ifp->if_hwassist &= ~CSUM_TSO;
-					if_printf(ifp,
-					    "enable txcsum first.\n");
-					error = EAGAIN;
-				}
-			} else
-				ifp->if_hwassist &= ~CSUM_TSO;
+		if (mask & IFCAP_RXCSUM_IPV6)
+			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+
+		/*
+		 * Note that we leave CSUM_TSO alone (it is always set).  The
+		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
+		 * sending a TSO request our way, so it's sufficient to toggle
+		 * IFCAP_TSOx only.
+		 */
+		if (mask & IFCAP_TSO4) {
+			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
+			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
+				if_printf(ifp, "enable txcsum first.\n");
+				error = EAGAIN;
+				goto fail;
+			}
+			ifp->if_capenable ^= IFCAP_TSO4;
+		}
+		if (mask & IFCAP_TSO6) {
+			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
+			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
+				if_printf(ifp, "enable txcsum6 first.\n");
+				error = EAGAIN;
+				goto fail;
+			}
+			ifp->if_capenable ^= IFCAP_TSO6;
 		}
 		if (mask & IFCAP_LRO) {
 			ifp->if_capenable ^= IFCAP_LRO;
@@ -2081,6 +2004,15 @@
 			/* Safe to do this even if cxgb_up not called yet */
 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
 		}
+#ifdef TCP_OFFLOAD
+		if (mask & IFCAP_TOE4) {
+			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
+
+			error = toe_capability(p, enable);
+			if (error == 0)
+				ifp->if_capenable ^= mask;
+		}
+#endif
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
@@ -3362,3 +3294,235 @@
 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
 	mk_set_tcb_field(req, tid, word, mask, val);
 }
+
+void
+t3_iterate(void (*func)(struct adapter *, void *), void *arg)
+{
+	struct adapter *sc;
+
+	mtx_lock(&t3_list_lock);
+	SLIST_FOREACH(sc, &t3_list, link) {
+		/*
+		 * func should not make any assumptions about what state sc is
+		 * in - the only guarantee is that sc->sc_lock is a valid lock.
+		 */
+		func(sc, arg);
+	}
+	mtx_unlock(&t3_list_lock);
+}
+
+#ifdef TCP_OFFLOAD
+static int
+toe_capability(struct port_info *pi, int enable)
+{
+	int rc;
+	struct adapter *sc = pi->adapter;
+
+	ADAPTER_LOCK_ASSERT_OWNED(sc);
+
+	if (!is_offload(sc))
+		return (ENODEV);
+
+	if (enable) {
+		if (!(sc->flags & FULL_INIT_DONE)) {
+			log(LOG_WARNING,
+			    "You must enable a cxgb interface first\n");
+			return (EAGAIN);
+		}
+
+		if (isset(&sc->offload_map, pi->port_id))
+			return (0);
+
+		if (!(sc->flags & TOM_INIT_DONE)) {
+			rc = t3_activate_uld(sc, ULD_TOM);
+			if (rc == EAGAIN) {
+				log(LOG_WARNING,
+				    "You must kldload t3_tom.ko before trying "
+				    "to enable TOE on a cxgb interface.\n");
+			}
+			if (rc != 0)
+				return (rc);
+			KASSERT(sc->tom_softc != NULL,
+			    ("%s: TOM activated but softc NULL", __func__));
+			KASSERT(sc->flags & TOM_INIT_DONE,
+			    ("%s: TOM activated but flag not set", __func__));
+		}
+
+		setbit(&sc->offload_map, pi->port_id);
+
+		/*
+		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
+		 * enabled on any port.  Need to figure out how to enable,
+		 * disable, load, and unload iWARP cleanly.
+		 */
+		if (!isset(&sc->offload_map, MAX_NPORTS) &&
+		    t3_activate_uld(sc, ULD_IWARP) == 0)
+			setbit(&sc->offload_map, MAX_NPORTS);
+	} else {
+		if (!isset(&sc->offload_map, pi->port_id))
+			return (0);
+
+		KASSERT(sc->flags & TOM_INIT_DONE,
+		    ("%s: TOM never initialized?", __func__));
+		clrbit(&sc->offload_map, pi->port_id);
+	}
+
+	return (0);
+}
+
+/*
+ * Add an upper layer driver to the global list.
+ */
+int
+t3_register_uld(struct uld_info *ui)
+{
+	int rc = 0;
+	struct uld_info *u;
+
+	mtx_lock(&t3_uld_list_lock);
+	SLIST_FOREACH(u, &t3_uld_list, link) {
+	    if (u->uld_id == ui->uld_id) {
+		    rc = EEXIST;
+		    goto done;
+	    }
+	}
+
+	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
+	ui->refcount = 0;
+done:
+	mtx_unlock(&t3_uld_list_lock);
+	return (rc);
+}
+
+int
+t3_unregister_uld(struct uld_info *ui)
+{
+	int rc = EINVAL;
+	struct uld_info *u;
+
+	mtx_lock(&t3_uld_list_lock);
+
+	SLIST_FOREACH(u, &t3_uld_list, link) {
+	    if (u == ui) {
+		    if (ui->refcount > 0) {
+			    rc = EBUSY;
+			    goto done;
+		    }
+
+		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
+		    rc = 0;
+		    goto done;
+	    }
+	}
+done:
+	mtx_unlock(&t3_uld_list_lock);
+	return (rc);
+}
+
+int
+t3_activate_uld(struct adapter *sc, int id)
+{
+	int rc = EAGAIN;
+	struct uld_info *ui;
+
+	mtx_lock(&t3_uld_list_lock);
+
+	SLIST_FOREACH(ui, &t3_uld_list, link) {
+		if (ui->uld_id == id) {
+			rc = ui->activate(sc);
+			if (rc == 0)
+				ui->refcount++;
+			goto done;
+		}
+	}
+done:
+	mtx_unlock(&t3_uld_list_lock);
+
+	return (rc);
+}
+
+int
+t3_deactivate_uld(struct adapter *sc, int id)
+{
+	int rc = EINVAL;
+	struct uld_info *ui;
+
+	mtx_lock(&t3_uld_list_lock);
+
+	SLIST_FOREACH(ui, &t3_uld_list, link) {
+		if (ui->uld_id == id) {
+			rc = ui->deactivate(sc);
+			if (rc == 0)
+				ui->refcount--;
+			goto done;
+		}
+	}
+done:
+	mtx_unlock(&t3_uld_list_lock);
+
+	return (rc);
+}
+
+static int
+cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
+    struct mbuf *m)
+{
+	m_freem(m);
+	return (EDOOFUS);
+}
+
+int
+t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
+{
+	uintptr_t *loc, new;
+
+	if (opcode >= NUM_CPL_HANDLERS)
+		return (EINVAL);
+
+	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
+	loc = (uintptr_t *) &sc->cpl_handler[opcode];
+	atomic_store_rel_ptr(loc, new);
+
+	return (0);
+}
+#endif
+
+static int
+cxgbc_mod_event(module_t mod, int cmd, void *arg)
+{
+	int rc = 0;
+
+	switch (cmd) {
+	case MOD_LOAD:
+		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
+		SLIST_INIT(&t3_list);
+#ifdef TCP_OFFLOAD
+		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
+		SLIST_INIT(&t3_uld_list);
+#endif
+		break;
+
+	case MOD_UNLOAD:
+#ifdef TCP_OFFLOAD
+		mtx_lock(&t3_uld_list_lock);
+		if (!SLIST_EMPTY(&t3_uld_list)) {
+			rc = EBUSY;
+			mtx_unlock(&t3_uld_list_lock);
+			break;
+		}
+		mtx_unlock(&t3_uld_list_lock);
+		mtx_destroy(&t3_uld_list_lock);
+#endif
+		mtx_lock(&t3_list_lock);
+		if (!SLIST_EMPTY(&t3_list)) {
+			rc = EBUSY;
+			mtx_unlock(&t3_list_lock);
+			break;
+		}
+		mtx_unlock(&t3_list_lock);
+		mtx_destroy(&t3_list_lock);
+		break;
+	}
+
+	return (rc);
+}
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/cxgb_offload.c
--- a/head/sys/dev/cxgb/cxgb_offload.c	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,465 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-
-***************************************************************************/
-
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bus.h>
-#include <sys/module.h>
-#include <sys/pciio.h>
-#include <sys/conf.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/bus_dma.h>
-#include <sys/rman.h>
-#include <sys/ioccom.h>
-#include <sys/mbuf.h>
-#include <sys/linker.h>
-#include <sys/firmware.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-#include <sys/syslog.h>
-#include <sys/queue.h>
-#include <sys/taskqueue.h>
-#include <sys/proc.h>
-
-#include <cxgb_include.h>
-
-#include <net/route.h>
-
-#define VALIDATE_TID 0
-MALLOC_DEFINE(M_CXGB, "cxgb", "Chelsio 10 Gigabit Ethernet and services");
-
-TAILQ_HEAD(, cxgb_client) client_list;
-TAILQ_HEAD(, t3cdev) ofld_dev_list;
-
-
-static struct mtx cxgb_db_lock;
-
-
-static int inited = 0;
-
-static inline int
-offload_activated(struct t3cdev *tdev)
-{
-	struct adapter *adapter = tdev2adap(tdev);
-	
-	return (isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT));
-}
-
-static inline void
-register_tdev(struct t3cdev *tdev)
-{
-	static int unit;
-
-	mtx_lock(&cxgb_db_lock);
-	snprintf(tdev->name, sizeof(tdev->name), "ofld_dev%d", unit++);
-	TAILQ_INSERT_TAIL(&ofld_dev_list, tdev, entry);
-	mtx_unlock(&cxgb_db_lock);
-}
-
-static inline void
-unregister_tdev(struct t3cdev *tdev)
-{
-	if (!inited)
-		return;
-
-	mtx_lock(&cxgb_db_lock);
-	TAILQ_REMOVE(&ofld_dev_list, tdev, entry);
-	mtx_unlock(&cxgb_db_lock);	
-}
-
-#ifndef TCP_OFFLOAD_DISABLE
-/**
- *	cxgb_register_client - register an offload client
- *	@client: the client
- *
- *	Add the client to the client list,
- *	and call backs the client for each activated offload device
- */
-void
-cxgb_register_client(struct cxgb_client *client)
-{
-	struct t3cdev *tdev;
-
-	mtx_lock(&cxgb_db_lock);
-	TAILQ_INSERT_TAIL(&client_list, client, client_entry);
-
-	if (client->add) {
-		TAILQ_FOREACH(tdev, &ofld_dev_list, entry) {
-			if (offload_activated(tdev)) {
-				client->add(tdev);
-			} else
-				CTR1(KTR_CXGB,
-				    "cxgb_register_client: %p not activated", tdev);
-			
-		}
-	}
-	mtx_unlock(&cxgb_db_lock);
-}
-
-/**
- *	cxgb_unregister_client - unregister an offload client
- *	@client: the client
- *
- *	Remove the client to the client list,
- *	and call backs the client for each activated offload device.
- */
-void
-cxgb_unregister_client(struct cxgb_client *client)
-{
-	struct t3cdev *tdev;
-
-	mtx_lock(&cxgb_db_lock);
-	TAILQ_REMOVE(&client_list, client, client_entry);
-
-	if (client->remove) {
-		TAILQ_FOREACH(tdev, &ofld_dev_list, entry) {
-			if (offload_activated(tdev))
-				client->remove(tdev);
-		}
-	}
-	mtx_unlock(&cxgb_db_lock);
-}
-
-/**
- *	cxgb_add_clients - activate register clients for an offload device
- *	@tdev: the offload device
- *
- *	Call backs all registered clients once a offload device is activated 
- */
-void
-cxgb_add_clients(struct t3cdev *tdev)
-{
-	struct cxgb_client *client;
-
-	mtx_lock(&cxgb_db_lock);
-	TAILQ_FOREACH(client, &client_list, client_entry) {
-		if (client->add)
-			client->add(tdev);
-	}
-	mtx_unlock(&cxgb_db_lock);
-}
-
-/**
- *	cxgb_remove_clients - activate register clients for an offload device
- *	@tdev: the offload device
- *
- *	Call backs all registered clients once a offload device is deactivated 
- */
-void
-cxgb_remove_clients(struct t3cdev *tdev)
-{
-	struct cxgb_client *client;
-
-	mtx_lock(&cxgb_db_lock);
-	TAILQ_FOREACH(client, &client_list, client_entry) {
-		if (client->remove)
-			client->remove(tdev);
-	}
-	mtx_unlock(&cxgb_db_lock);
-}
-#endif
-
-/**
- * cxgb_ofld_recv - process n received offload packets
- * @dev: the offload device
- * @m: an array of offload packets
- * @n: the number of offload packets
- *
- * Process an array of ingress offload packets.  Each packet is forwarded
- * to any active network taps and then passed to the offload device's receive
- * method.  We optimize passing packets to the receive method by passing
- * it the whole array at once except when there are active taps.
- */
-int
-cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n)
-{
-
-	return dev->recv(dev, m, n);
-}
-
-/*
- * Dummy handler for Rx offload packets in case we get an offload packet before
- * proper processing is setup.  This complains and drops the packet as it isn't
- * normal to get offload packets at this stage.
- */
-static int
-rx_offload_blackhole(struct t3cdev *dev, struct mbuf **m, int n)
-{
-	while (n--)
-		m_freem(m[n]);
-	return 0;
-}
-
-static void
-dummy_neigh_update(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr,
-    struct sockaddr *sa)
-{
-}
-
-void
-cxgb_set_dummy_ops(struct t3cdev *dev)
-{
-	dev->recv         = rx_offload_blackhole;
-	dev->arp_update = dummy_neigh_update;
-}
-
-static int
-do_smt_write_rpl(struct t3cdev *dev, struct mbuf *m)
-{
-	struct cpl_smt_write_rpl *rpl = cplhdr(m);
-
-	if (rpl->status != CPL_ERR_NONE)
-		log(LOG_ERR,
-		       "Unexpected SMT_WRITE_RPL status %u for entry %u\n",
-		       rpl->status, GET_TID(rpl));
-
-	return CPL_RET_BUF_DONE;
-}
-
-static int
-do_l2t_write_rpl(struct t3cdev *dev, struct mbuf *m)
-{
-	struct cpl_l2t_write_rpl *rpl = cplhdr(m);
-
-	if (rpl->status != CPL_ERR_NONE)
-		log(LOG_ERR,
-		       "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
-		       rpl->status, GET_TID(rpl));
-
-	return CPL_RET_BUF_DONE;
-}
-
-static int
-do_rte_write_rpl(struct t3cdev *dev, struct mbuf *m)
-{
-	struct cpl_rte_write_rpl *rpl = cplhdr(m);
-
-	if (rpl->status != CPL_ERR_NONE)
-		log(LOG_ERR,
-		       "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
-		       rpl->status, GET_TID(rpl));
-
-	return CPL_RET_BUF_DONE;
-}
-
-static int
-do_set_tcb_rpl(struct t3cdev *dev, struct mbuf *m)
-{
-	struct cpl_set_tcb_rpl *rpl = cplhdr(m);
-
-	if (rpl->status != CPL_ERR_NONE)
-		log(LOG_ERR,
-		    "Unexpected SET_TCB_RPL status %u for tid %u\n",
-			rpl->status, GET_TID(rpl));
-	return CPL_RET_BUF_DONE;
-}
-
-static int
-do_trace(struct t3cdev *dev, struct mbuf *m)
-{
-#if 0
-	struct cpl_trace_pkt *p = cplhdr(m);
-
-
-	skb->protocol = 0xffff;
-	skb->dev = dev->lldev;
-	skb_pull(skb, sizeof(*p));
-	skb->mac.raw = mtod(m, (char *));
-	netif_receive_skb(skb);
-#endif	
-	return 0;
-}
-
-/*
- * Process a received packet with an unknown/unexpected CPL opcode.
- */
-static int
-do_bad_cpl(struct t3cdev *dev, struct mbuf *m)
-{
-	log(LOG_ERR, "%s: received bad CPL command 0x%x\n", dev->name,
-	    0xFF & *mtod(m, uint32_t *));
-	return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
-}
-
-/*
- * Handlers for each CPL opcode
- */
-static cpl_handler_func cpl_handlers[256];
-
-/*
- * T3CDEV's receive method.
- */
-int
-process_rx(struct t3cdev *dev, struct mbuf **m, int n)
-{
-	while (n--) {
-		struct mbuf *m0 = *m++;
-		unsigned int opcode = G_OPCODE(ntohl(m0->m_pkthdr.csum_data));
-		int ret;
-
-		DPRINTF("processing op=0x%x m=%p data=%p\n", opcode, m0, m0->m_data);
-		
-		ret = cpl_handlers[opcode] (dev, m0);
-
-#if VALIDATE_TID
-		if (ret & CPL_RET_UNKNOWN_TID) {
-			union opcode_tid *p = cplhdr(m0);
-
-			log(LOG_ERR, "%s: CPL message (opcode %u) had "
-			       "unknown TID %u\n", dev->name, opcode,
-			       G_TID(ntohl(p->opcode_tid)));
-		}
-#endif
-		if (ret & CPL_RET_BUF_DONE)
-			m_freem(m0);
-	}
-	return 0;
-}
-
-/*
- * Add a new handler to the CPL dispatch table.  A NULL handler may be supplied
- * to unregister an existing handler.
- */
-void
-t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h)
-{
-	if (opcode < NUM_CPL_CMDS)
-		cpl_handlers[opcode] = h ? h : do_bad_cpl;
-	else
-		log(LOG_ERR, "T3C: handler registration for "
-		       "opcode %x failed\n", opcode);
-}
-
-/*
- * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
- * The allocated memory is cleared.
- */
-void *
-cxgb_alloc_mem(unsigned long size)
-{
-
-	return malloc(size, M_CXGB, M_ZERO|M_NOWAIT);
-}
-
-/*
- * Free memory allocated through t3_alloc_mem().
- */
-void
-cxgb_free_mem(void *addr)
-{
-	free(addr, M_CXGB);
-}
-
-static __inline int
-adap2type(struct adapter *adapter) 
-{ 
-        int type = 0; 
- 
-        switch (adapter->params.rev) { 
-        case T3_REV_A: 
-                type = T3A; 
-                break; 
-        case T3_REV_B: 
-        case T3_REV_B2: 
-                type = T3B; 
-                break; 
-        case T3_REV_C: 
-                type = T3C; 
-                break; 
-        } 
-        return type; 
-}
-
-void
-cxgb_adapter_ofld(struct adapter *adapter)
-{
-	struct t3cdev *tdev = &adapter->tdev;
-
-	cxgb_set_dummy_ops(tdev);
-	tdev->type = adap2type(adapter);
-	tdev->adapter = adapter;
-	register_tdev(tdev);	
-
-}
-
-void
-cxgb_adapter_unofld(struct adapter *adapter)
-{
-	struct t3cdev *tdev = &adapter->tdev;
-
-	tdev->recv = NULL;
-	tdev->arp_update = NULL;
-	unregister_tdev(tdev);	
-}
-
-void
-cxgb_offload_init(void)
-{
-	int i;
-
-	if (inited++)
-		return;
-	
-	mtx_init(&cxgb_db_lock, "ofld db", NULL, MTX_DEF);
-
-	TAILQ_INIT(&client_list);
-	TAILQ_INIT(&ofld_dev_list);
-	
-	for (i = 0; i < 0x100; ++i)
-		cpl_handlers[i] = do_bad_cpl;
-	
-	t3_register_cpl_handler(CPL_SMT_WRITE_RPL, do_smt_write_rpl);
-	t3_register_cpl_handler(CPL_RTE_WRITE_RPL, do_rte_write_rpl);
-	t3_register_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
-
-	t3_register_cpl_handler(CPL_SET_TCB_RPL, do_set_tcb_rpl);
-	t3_register_cpl_handler(CPL_TRACE_PKT, do_trace);
-	
-}
-
-void 
-cxgb_offload_exit(void)
-{
-
-	if (--inited)
-		return;
-
-	mtx_destroy(&cxgb_db_lock);
-}
-
-MODULE_VERSION(if_cxgb, 1);
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/cxgb_offload.h
--- a/head/sys/dev/cxgb/cxgb_offload.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/cxgb_offload.h	Wed Jul 25 17:04:43 2012 +0300
@@ -1,4 +1,3 @@
-
 /**************************************************************************
 
 Copyright (c) 2007-2008, Chelsio Inc.
@@ -26,228 +25,100 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/cxgb_offload.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 
 #ifndef _CXGB_OFFLOAD_H
 #define _CXGB_OFFLOAD_H
 
-#include <common/cxgb_tcb.h>
-#include <t3cdev.h>
-
-MALLOC_DECLARE(M_CXGB);
+#ifdef TCP_OFFLOAD
+enum {
+	ULD_TOM = 1,
+	ULD_IWARP = 2,
+};
 
 struct adapter;
-struct cxgb_client;
-
-void cxgb_offload_init(void);
-void cxgb_offload_exit(void);
-
-void cxgb_adapter_ofld(struct adapter *adapter);
-void cxgb_adapter_unofld(struct adapter *adapter);
-int cxgb_offload_activate(struct adapter *adapter);
-void cxgb_offload_deactivate(struct adapter *adapter);
-int cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n);
-
-void cxgb_set_dummy_ops(struct t3cdev *dev);
-
-
-/*
- * Client registration.  Users of T3 driver must register themselves.
- * The T3 driver will call the add function of every client for each T3
- * adapter activated, passing up the t3cdev ptr.  Each client fills out an
- * array of callback functions to process CPL messages.
- */
-
-void cxgb_register_client(struct cxgb_client *client);
-void cxgb_unregister_client(struct cxgb_client *client);
-void cxgb_add_clients(struct t3cdev *tdev);
-void cxgb_remove_clients(struct t3cdev *tdev);
-
-typedef int (*cxgb_cpl_handler_func)(struct t3cdev *dev,
-				      struct mbuf *m, void *ctx);
-
-struct l2t_entry;
-struct cxgb_client {
-	char 			*name;
-	void 			(*add) (struct t3cdev *);
-	void 			(*remove) (struct t3cdev *);
-	cxgb_cpl_handler_func 	*handlers;
-	int			(*redirect)(void *ctx, struct rtentry *old,
-					    struct rtentry *new,
-					    struct l2t_entry *l2t);
-	TAILQ_ENTRY(cxgb_client)         client_entry;
+struct uld_info {
+	SLIST_ENTRY(uld_info) link;
+	int refcount;
+	int uld_id;
+	int (*activate)(struct adapter *);
+	int (*deactivate)(struct adapter *);
 };
 
-/*
- * TID allocation services.
- */
-int cxgb_alloc_atid(struct t3cdev *dev, struct cxgb_client *client,
-		     void *ctx);
-int cxgb_alloc_stid(struct t3cdev *dev, struct cxgb_client *client,
-		     void *ctx);
-void *cxgb_free_atid(struct t3cdev *dev, int atid);
-void cxgb_free_stid(struct t3cdev *dev, int stid);
-void *cxgb_get_lctx(struct t3cdev *tdev, int stid);
-void cxgb_insert_tid(struct t3cdev *dev, struct cxgb_client *client,
-		      void *ctx,
-	unsigned int tid);
-void cxgb_queue_tid_release(struct t3cdev *dev, unsigned int tid);
-void cxgb_remove_tid(struct t3cdev *dev, void *ctx, unsigned int tid);
-
-struct toe_tid_entry {
-	struct cxgb_client 	*client;
-	void 			*ctx;
+struct tom_tunables {
+	int sndbuf;
+	int ddp;
+	int indsz;
+	int ddp_thres;
 };
 
 /* CPL message priority levels */
 enum {
 	CPL_PRIORITY_DATA = 0,     /* data messages */
-	CPL_PRIORITY_SETUP = 1,	   /* connection setup messages */
-	CPL_PRIORITY_TEARDOWN = 0, /* connection teardown messages */
-	CPL_PRIORITY_LISTEN = 1,   /* listen start/stop messages */
-	CPL_PRIORITY_ACK = 1,      /* RX ACK messages */
 	CPL_PRIORITY_CONTROL = 1   /* offload control messages */
 };
 
-/* Flags for return value of CPL message handlers */
-enum {
-	CPL_RET_BUF_DONE = 1,   // buffer processing done, buffer may be freed
-	CPL_RET_BAD_MSG = 2,    // bad CPL message (e.g., unknown opcode)
-	CPL_RET_UNKNOWN_TID = 4	// unexpected unknown TID
-};
+#define S_HDR_NDESC	0
+#define M_HDR_NDESC	0xf
+#define V_HDR_NDESC(x)	((x) << S_HDR_NDESC)
+#define G_HDR_NDESC(x)	(((x) >> S_HDR_NDESC) & M_HDR_NDESC)
 
-typedef int (*cpl_handler_func)(struct t3cdev *dev, struct mbuf *m);
+#define S_HDR_QSET	4
+#define M_HDR_QSET	0xf
+#define V_HDR_QSET(x)	((x) << S_HDR_QSET)
+#define G_HDR_QSET(x)	(((x) >> S_HDR_QSET) & M_HDR_QSET)
 
-/*
- * Returns a pointer to the first byte of the CPL header in an sk_buff that
- * contains a CPL message.
- */
-static inline void *cplhdr(struct mbuf *m)
+#define S_HDR_CTRL	8
+#define V_HDR_CTRL(x)	((x) << S_HDR_CTRL)
+#define F_HDR_CTRL	V_HDR_CTRL(1U)
+
+#define S_HDR_DF	9
+#define V_HDR_DF(x)	((x) << S_HDR_DF)
+#define F_HDR_DF	V_HDR_DF(1U)
+
+#define S_HDR_SGL	10
+#define V_HDR_SGL(x)	((x) << S_HDR_SGL)
+#define F_HDR_SGL	V_HDR_SGL(1U)
+
+struct ofld_hdr
 {
-	return mtod(m, uint8_t *);
-}
-
-void t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h);
-
-union listen_entry {
-	struct toe_tid_entry toe_tid;
-	union listen_entry *next;
-};
-
-union active_open_entry {
-	struct toe_tid_entry toe_tid;
-	union active_open_entry *next;
+	void *sgl;	/* SGL, if F_HDR_SGL set in flags */
+	int plen;	/* amount of payload (in bytes) */
+	int flags;
 };
 
 /*
- * Holds the size, base address, free list start, etc of the TID, server TID,
- * and active-open TID tables for a offload device.
- * The tables themselves are allocated dynamically.
+ * Convenience function for fixed size CPLs that fit in 1 desc.
  */
-struct tid_info {
-	struct toe_tid_entry *tid_tab;
-	unsigned int ntids;
-	volatile unsigned int tids_in_use;
+#define M_GETHDR_OFLD(qset, ctrl, cpl) \
+    m_gethdr_ofld(qset, ctrl, sizeof(*cpl), (void **)&cpl)
+static inline struct mbuf *
+m_gethdr_ofld(int qset, int ctrl, int cpllen, void **cpl)
+{
+	struct mbuf *m;
+	struct ofld_hdr *oh;
 
-	union listen_entry *stid_tab;
-	unsigned int nstids;
-	unsigned int stid_base;
+	m = m_gethdr(M_NOWAIT, MT_DATA);
+	if (m == NULL)
+		return (NULL);
 
-	union active_open_entry *atid_tab;
-	unsigned int natids;
-	unsigned int atid_base;
+	oh = mtod(m, struct ofld_hdr *);
+	oh->flags = V_HDR_NDESC(1) | V_HDR_QSET(qset) | V_HDR_CTRL(ctrl);
+	*cpl = (void *)(oh + 1);
+	m->m_pkthdr.len = m->m_len = sizeof(*oh) + cpllen;
 
-	/*
-	 * The following members are accessed R/W so we put them in their own
-	 * cache lines.
-	 *
-	 * XXX We could combine the atid fields above with the lock here since
-	 * atids are use once (unlike other tids).  OTOH the above fields are
-	 * usually in cache due to tid_tab.
-	 */
-	struct mtx atid_lock /* ____cacheline_aligned_in_smp */;
-	union active_open_entry *afree;
-	unsigned int atids_in_use;
-
-	struct mtx stid_lock /*____cacheline_aligned */;
-	union listen_entry *sfree;
-	unsigned int stids_in_use;
-};
-
-struct t3c_data {
-	struct t3cdev *dev;
-	unsigned int tx_max_chunk;  /* max payload for TX_DATA */
-	unsigned int max_wrs;       /* max in-flight WRs per connection */
-	unsigned int nmtus;
-	const unsigned short *mtus;
-	struct tid_info tid_maps;
-
-	struct toe_tid_entry *tid_release_list;
-	struct mtx tid_release_lock;
-	struct task tid_release_task;
-};
-
-/*
- * t3cdev -> toe_data accessor
- */
-#define T3C_DATA(dev) (*(struct t3c_data **)&(dev)->l4opt)
-
-/*
- * Map an ATID or STID to their entries in the corresponding TID tables.
- */
-static inline union active_open_entry *atid2entry(const struct tid_info *t,
-                                                  unsigned int atid)
-{
-        return &t->atid_tab[atid - t->atid_base];
+	return (m);
 }
 
+int t3_register_uld(struct uld_info *);
+int t3_unregister_uld(struct uld_info *);
+int t3_activate_uld(struct adapter *, int);
+int t3_deactivate_uld(struct adapter *, int);
+#endif	/* TCP_OFFLOAD */
 
-static inline union listen_entry *stid2entry(const struct tid_info *t,
-                                             unsigned int stid)
-{
-        return &t->stid_tab[stid - t->stid_base];
-}
+#define CXGB_UNIMPLEMENTED() \
+    panic("IMPLEMENT: %s:%s:%d", __FUNCTION__, __FILE__, __LINE__)
 
-/*
- * Find the connection corresponding to a TID.
- */
-static inline struct toe_tid_entry *lookup_tid(const struct tid_info *t,
-                                               unsigned int tid)
-{
-        return tid < t->ntids ? &(t->tid_tab[tid]) : NULL;
-}
-
-/*
- * Find the connection corresponding to a server TID.
- */
-static inline struct toe_tid_entry *lookup_stid(const struct tid_info *t,
-                                                unsigned int tid)
-{
-        if (tid < t->stid_base || tid >= t->stid_base + t->nstids)
-                return NULL;
-        return &(stid2entry(t, tid)->toe_tid);
-}
-
-/*
- * Find the connection corresponding to an active-open TID.
- */
-static inline struct toe_tid_entry *lookup_atid(const struct tid_info *t,
-                                                unsigned int tid)
-{
-        if (tid < t->atid_base || tid >= t->atid_base + t->natids)
-                return NULL;
-        return &(atid2entry(t, tid)->toe_tid);
-}
-
-void *cxgb_alloc_mem(unsigned long size);
-void cxgb_free_mem(void *addr);
-void cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
-void cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa);
-int process_rx(struct t3cdev *dev, struct mbuf **m, int n);
-int attach_t3cdev(struct t3cdev *dev);
-void detach_t3cdev(struct t3cdev *dev);
-
-#define CXGB_UNIMPLEMENTED() panic("IMPLEMENT: %s:%s:%d", __FUNCTION__, __FILE__, __LINE__)
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/cxgb_osdep.h
--- a/head/sys/dev/cxgb/cxgb_osdep.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/cxgb_osdep.h	Wed Jul 25 17:04:43 2012 +0300
@@ -26,7 +26,7 @@
 POSSIBILITY OF SUCH DAMAGE.
 
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/cxgb_osdep.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 
@@ -67,27 +67,6 @@
 } while (0)
 #endif
 
-#define m_get_priority(m) ((uintptr_t)(m)->m_pkthdr.rcvif)
-#define m_set_priority(m, pri) ((m)->m_pkthdr.rcvif = (struct ifnet *)((uintptr_t)pri))
-#define m_set_sgl(m, sgl) ((m)->m_pkthdr.header = (sgl))
-#define m_get_sgl(m) ((bus_dma_segment_t *)(m)->m_pkthdr.header)
-#define m_set_sgllen(m, len) ((m)->m_pkthdr.ether_vtag = len)
-#define m_get_sgllen(m) ((m)->m_pkthdr.ether_vtag)
-
-/*
- * XXX FIXME
- */
-#define m_set_toep(m, a) ((m)->m_pkthdr.header = (a))
-#define m_get_toep(m) ((m)->m_pkthdr.header)
-#define m_set_handler(m, handler) ((m)->m_pkthdr.header = (handler))
-
-#define m_set_socket(m, a) ((m)->m_pkthdr.header = (a))
-#define m_get_socket(m) ((m)->m_pkthdr.header)
-
-#define	KTR_CXGB	KTR_SPARE2
-
-#define MT_DONTFREE  128
-
 #if __FreeBSD_version < 800054
 #if defined (__GNUC__)
   #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
@@ -123,13 +102,6 @@
 
 #define CXGB_TX_CLEANUP_THRESHOLD        32
 
-
-#ifdef DEBUG_PRINT
-#define DPRINTF printf
-#else 
-#define DPRINTF(...)
-#endif
-
 #define TX_MAX_SIZE                (1 << 16)    /* 64KB                          */
 #define TX_MAX_SEGS                      36     /* maximum supported by card     */
 
@@ -199,7 +171,6 @@
 #define test_and_clear_bit(bit, p) atomic_cmpset_int((p), ((*(p)) | (1<<bit)), ((*(p)) & ~(1<<bit)))
 
 #define max_t(type, a, b) (type)max((a), (b))
-#define net_device ifnet
 #define cpu_to_be32            htobe32
 
 /* Standard PHY definitions */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/cxgb_sge.c
--- a/head/sys/dev/cxgb/cxgb_sge.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/cxgb_sge.c	Wed Jul 25 17:04:43 2012 +0300
@@ -28,8 +28,9 @@
 ***************************************************************************/
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 232854 2012-03-12 08:03:51Z scottl $");
-
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 237832 2012-06-30 02:11:53Z np $");
+
+#include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
@@ -53,6 +54,7 @@
 #include <sys/systm.h>
 #include <sys/syslog.h>
 #include <sys/socket.h>
+#include <sys/sglist.h>
 
 #include <net/bpf.h>	
 #include <net/ethernet.h>
@@ -77,6 +79,10 @@
 int	txq_fills = 0;
 int	multiq_tx_enable = 1;
 
+#ifdef TCP_OFFLOAD
+CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
+#endif
+
 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
@@ -470,10 +476,17 @@
 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
 {
 
-	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
+	if (resp->rss_hdr.opcode == CPL_RX_DATA) {
+		const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
+		m->m_len = sizeof(*cpl) + ntohs(cpl->len);
+	} else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
+		const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
+		m->m_len = sizeof(*cpl) + ntohs(cpl->len);
+	} else
+		m->m_len = IMMED_PKT_SIZE;
 	m->m_ext.ext_buf = NULL;
 	m->m_ext.ext_type = 0;
-	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 
+	memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 
 	return (0);	
 }
 
@@ -702,7 +715,8 @@
 	cb_arg.error = 0;
 	while (n--) {
 		/*
-		 * We only allocate a cluster, mbuf allocation happens after rx
+		 * We allocate an uninitialized mbuf + cluster, mbuf is
+		 * initialized after rx.
 		 */
 		if (q->zone == zone_pack) {
 			if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
@@ -1169,57 +1183,6 @@
 	return flits_to_desc(flits);
 }
 
-static unsigned int
-busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
-    struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
-{
-	struct mbuf *m0;
-	int err, pktlen, pass = 0;
-	bus_dma_tag_t tag = txq->entry_tag;
-
-retry:
-	err = 0;
-	m0 = *m;
-	pktlen = m0->m_pkthdr.len;
-#if defined(__i386__) || defined(__amd64__)
-	if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
-		goto done;
-	} else
-#endif
-		err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
-
-	if (err == 0) {
-		goto done;
-	}
-	if (err == EFBIG && pass == 0) {
-		pass = 1;
-		/* Too many segments, try to defrag */
-		m0 = m_defrag(m0, M_DONTWAIT);
-		if (m0 == NULL) {
-			m_freem(*m);
-			*m = NULL;
-			return (ENOBUFS);
-		}
-		*m = m0;
-		goto retry;
-	} else if (err == ENOMEM) {
-		return (err);
-	} if (err) {
-		if (cxgb_debug)
-			printf("map failure err=%d pktlen=%d\n", err, pktlen);
-		m_freem(m0);
-		*m = NULL;
-		return (err);
-	}
-done:
-#if !defined(__i386__) && !defined(__amd64__)
-	bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
-#endif	
-	txsd->flags |= TX_SW_DESC_MAPPED;
-
-	return (0);
-}
-
 /**
  *	make_sgl - populate a scatter/gather list for a packet
  *	@sgp: the SGL to populate
@@ -1327,10 +1290,10 @@
 	
 	if (__predict_true(ndesc == 1)) {
 		set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
-			V_WR_SGLSFLT(flits)) | wr_hi,
-		    htonl(V_WR_LEN(flits + sgl_flits) |
-			V_WR_GEN(txqs->gen)) | wr_lo);
-		/* XXX gen? */
+		    V_WR_SGLSFLT(flits)) | wr_hi,
+		    htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
+		    wr_lo);
+
 		wr_gen2(txd, txqs->gen);
 		
 	} else {
@@ -1469,7 +1432,8 @@
 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 			if (__predict_false(!(cflags & CSUM_IP)))
 				cntrl |= F_TXPKT_IPCSUM_DIS;
-			if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
+			if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
+			    CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 				cntrl |= F_TXPKT_L4CSUM_DIS;
 
 			hflit[0] = htonl(cntrl);
@@ -1584,7 +1548,8 @@
 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 			cntrl |= F_TXPKT_IPCSUM_DIS;
-		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
+		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
+		    CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 			cntrl |= F_TXPKT_L4CSUM_DIS;
 		cpl->cntrl = htonl(cntrl);
 		cpl->len = htonl(mlen | 0x80000000);
@@ -1812,34 +1777,23 @@
  *	its entirety.
  */
 static __inline void
-write_imm(struct tx_desc *d, struct mbuf *m,
+write_imm(struct tx_desc *d, caddr_t src,
 	  unsigned int len, unsigned int gen)
 {
-	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
+	struct work_request_hdr *from = (struct work_request_hdr *)src;
 	struct work_request_hdr *to = (struct work_request_hdr *)d;
 	uint32_t wr_hi, wr_lo;
 
-	if (len > WR_LEN)
-		panic("len too big %d\n", len);
-	if (len < sizeof(*from))
-		panic("len too small %d", len);
+	KASSERT(len <= WR_LEN && len >= sizeof(*from),
+	    ("%s: invalid len %d", __func__, len));
 	
 	memcpy(&to[1], &from[1], len - sizeof(*from));
 	wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
-					V_WR_BCNTLFLT(len & 7));
-	wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
-					V_WR_LEN((len + 7) / 8));
+	    V_WR_BCNTLFLT(len & 7));
+	wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
 	set_wr_hdr(to, wr_hi, wr_lo);
 	wmb();
 	wr_gen2(d, gen);
-
-	/*
-	 * This check is a hack we should really fix the logic so
-	 * that this can't happen
-	 */
-	if (m->m_type != MT_DONTFREE)
-		m_freem(m);
-	
 }
 
 /**
@@ -1907,12 +1861,6 @@
 	q->cleaned += reclaim;
 }
 
-static __inline int
-immediate(const struct mbuf *m)
-{
-	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
-}
-
 /**
  *	ctrl_xmit - send a packet through an SGE control Tx queue
  *	@adap: the adapter
@@ -1930,11 +1878,8 @@
 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
 	
-	if (__predict_false(!immediate(m))) {
-		m_freem(m);
-		return 0;
-	}
-	
+	KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
+
 	wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 	wrp->wrh_lo = htonl(V_WR_TID(q->token));
 
@@ -1949,7 +1894,7 @@
 		}
 		goto again;
 	}
-	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
+	write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 	
 	q->in_use++;
 	if (++q->pidx >= q->size) {
@@ -1959,7 +1904,9 @@
 	TXQ_UNLOCK(qs);
 	wmb();
 	t3_write_reg(adap, A_SG_KDOORBELL,
-		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+	    F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+
+	m_free(m);
 	return (0);
 }
 
@@ -1984,7 +1931,8 @@
 	while (q->in_use < q->size &&
 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
 
-		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
+		write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
+		m_free(m);
 
 		if (++q->pidx >= q->size) {
 			q->pidx = 0;
@@ -2085,7 +2033,7 @@
 		MTX_DESTROY(&q->rspq.lock);
 	}
 
-#ifdef INET
+#if defined(INET6) || defined(INET)
 	tcp_lro_free(&q->lro.ctrl);
 #endif
 
@@ -2238,6 +2186,7 @@
 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 #define NOMEM_INTR_DELAY 2500
 
+#ifdef TCP_OFFLOAD
 /**
  *	write_ofld_wr - write an offload work request
  *	@adap: the adapter
@@ -2251,71 +2200,66 @@
  *	data already carry the work request with most fields populated.
  */
 static void
-write_ofld_wr(adapter_t *adap, struct mbuf *m,
-    struct sge_txq *q, unsigned int pidx,
-    unsigned int gen, unsigned int ndesc,
-    bus_dma_segment_t *segs, unsigned int nsegs)
+write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
+    unsigned int pidx, unsigned int gen, unsigned int ndesc)
 {
 	unsigned int sgl_flits, flits;
+	int i, idx, nsegs, wrlen;
 	struct work_request_hdr *from;
-	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
+	struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
 	struct tx_desc *d = &q->desc[pidx];
 	struct txq_state txqs;
-	
-	if (immediate(m) && nsegs == 0) {
-		write_imm(d, m, m->m_len, gen);
+	struct sglist_seg *segs;
+	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
+	struct sglist *sgl;
+
+	from = (void *)(oh + 1);	/* Start of WR within mbuf */
+	wrlen = m->m_len - sizeof(*oh);
+
+	if (!(oh->flags & F_HDR_SGL)) {
+		write_imm(d, (caddr_t)from, wrlen, gen);
+
+		/*
+		 * mbuf with "real" immediate tx data will be enqueue_wr'd by
+		 * t3_push_frames and freed in wr_ack.  Others, like those sent
+		 * down by close_conn, t3_send_reset, etc. should be freed here.
+		 */
+		if (!(oh->flags & F_HDR_DF))
+			m_free(m);
 		return;
 	}
 
-	/* Only TX_DATA builds SGLs */
-	from = mtod(m, struct work_request_hdr *);
-	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
-
-	flits = m->m_len / 8;
-	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
-
-	make_sgl(sgp, segs, nsegs);
+	memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
+
+	sgl = oh->sgl;
+	flits = wrlen / 8;
+	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
+
+	nsegs = sgl->sg_nseg;
+	segs = sgl->sg_segs;
+	for (idx = 0, i = 0; i < nsegs; i++) {
+		KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
+		if (i && idx == 0) 
+			++sgp;
+		sgp->len[idx] = htobe32(segs[i].ss_len);
+		sgp->addr[idx] = htobe64(segs[i].ss_paddr);
+		idx ^= 1;
+	}
+	if (idx) {
+		sgp->len[idx] = 0;
+		sgp->addr[idx] = 0;
+	}
+
 	sgl_flits = sgl_len(nsegs);
-
 	txqs.gen = gen;
 	txqs.pidx = pidx;
 	txqs.compl = 0;
 
-	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
+	write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
 	    from->wrh_hi, from->wrh_lo);
 }
 
 /**
- *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
- *	@m: the packet
- *
- * 	Returns the number of Tx descriptors needed for the given offload
- * 	packet.  These packets are already fully constructed.
- */
-static __inline unsigned int
-calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
-{
-	unsigned int flits, cnt = 0;
-	int ndescs;
-
-	if (m->m_len <= WR_LEN && nsegs == 0)
-		return (1);                 /* packet fits as immediate data */
-
-	/*
-	 * This needs to be re-visited for TOE
-	 */
-
-	cnt = nsegs;
-		
-	/* headers */
-	flits = m->m_len / 8;
-
-	ndescs = flits_to_desc(flits + sgl_len(cnt));
-
-	return (ndescs);
-}
-
-/**
  *	ofld_xmit - send a packet through an offload queue
  *	@adap: the adapter
  *	@q: the Tx offload queue
@@ -2326,28 +2270,19 @@
 static int
 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 {
-	int ret, nsegs;
+	int ret;
 	unsigned int ndesc;
 	unsigned int pidx, gen;
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
-	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
-	struct tx_sw_desc *stx;
-
-	nsegs = m_get_sgllen(m);
-	vsegs = m_get_sgl(m);
-	ndesc = calc_tx_descs_ofld(m, nsegs);
-	busdma_map_sgl(vsegs, segs, nsegs);
-
-	stx = &q->sdesc[q->pidx];
-	
+	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
+
+	ndesc = G_HDR_NDESC(oh->flags);
+
 	TXQ_LOCK(qs);
 again:	reclaim_completed_tx(qs, 16, TXQ_OFLD);
 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 	if (__predict_false(ret)) {
 		if (ret == 1) {
-			printf("no ofld desc avail\n");
-			
-			m_set_priority(m, ndesc);     /* save for restart */
 			TXQ_UNLOCK(qs);
 			return (EINTR);
 		}
@@ -2362,16 +2297,11 @@
 		q->pidx -= q->size;
 		q->gen ^= 1;
 	}
-#ifdef T3_TRACE
-	T3_TRACE5(adap->tb[q->cntxt_id & 7],
-		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
-		  ndesc, pidx, skb->len, skb->len - skb->data_len,
-		  skb_shinfo(skb)->nr_frags);
-#endif
+
+	write_ofld_wr(adap, m, q, pidx, gen, ndesc);
+	check_ring_tx_db(adap, q, 1);
 	TXQ_UNLOCK(qs);
 
-	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
-	check_ring_tx_db(adap, q, 1);
 	return (0);
 }
 
@@ -2388,16 +2318,15 @@
 	struct sge_qset *qs = data;
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
 	adapter_t *adap = qs->port->adapter;
-	bus_dma_segment_t segs[TX_MAX_SEGS];
-	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
-	int nsegs, cleaned;
+	int cleaned;
 		
 	TXQ_LOCK(qs);
 again:	cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 
 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
 		unsigned int gen, pidx;
-		unsigned int ndesc = m_get_priority(m);
+		struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
+		unsigned int ndesc = G_HDR_NDESC(oh->flags);
 
 		if (__predict_false(q->size - q->in_use < ndesc)) {
 			setbit(&qs->txq_stopped, TXQ_OFLD);
@@ -2418,9 +2347,8 @@
 		}
 		
 		(void)mbufq_dequeue(&q->sendq);
-		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
 		TXQ_UNLOCK(qs);
-		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
+		write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 		TXQ_LOCK(qs);
 	}
 #if USE_GTS
@@ -2434,34 +2362,7 @@
 }
 
 /**
- *	queue_set - return the queue set a packet should use
- *	@m: the packet
- *
- *	Maps a packet to the SGE queue set it should use.  The desired queue
- *	set is carried in bits 1-3 in the packet's priority.
- */
-static __inline int
-queue_set(const struct mbuf *m)
-{
-	return m_get_priority(m) >> 1;
-}
-
-/**
- *	is_ctrl_pkt - return whether an offload packet is a control packet
- *	@m: the packet
- *
- *	Determines whether an offload packet should use an OFLD or a CTRL
- *	Tx queue.  This is indicated by bit 0 in the packet's priority.
- */
-static __inline int
-is_ctrl_pkt(const struct mbuf *m)
-{
-	return m_get_priority(m) & 1;
-}
-
-/**
  *	t3_offload_tx - send an offload packet
- *	@tdev: the offload device to send to
  *	@m: the packet
  *
  *	Sends an offload packet.  We use the packet priority to select the
@@ -2469,77 +2370,35 @@
  *	should be sent as regular or control, bits 1-3 select the queue set.
  */
 int
-t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
+t3_offload_tx(struct adapter *sc, struct mbuf *m)
 {
-	adapter_t *adap = tdev2adap(tdev);
-	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
-
-	if (__predict_false(is_ctrl_pkt(m))) 
-		return ctrl_xmit(adap, qs, m);
-
-	return ofld_xmit(adap, qs, m);
+	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
+	struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
+
+	if (oh->flags & F_HDR_CTRL) {
+		m_adj(m, sizeof (*oh));	/* trim ofld_hdr off */
+		return (ctrl_xmit(sc, qs, m));
+	} else
+		return (ofld_xmit(sc, qs, m));
 }
-
-/**
- *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
- *	@tdev: the offload device that will be receiving the packets
- *	@q: the SGE response queue that assembled the bundle
- *	@m: the partial bundle
- *	@n: the number of packets in the bundle
- *
- *	Delivers a (partial) bundle of Rx offload packets to an offload device.
- */
-static __inline void
-deliver_partial_bundle(struct t3cdev *tdev,
-			struct sge_rspq *q,
-			struct mbuf *mbufs[], int n)
-{
-	if (n) {
-		q->offload_bundles++;
-		cxgb_ofld_recv(tdev, mbufs, n);
-	}
-}
-
-static __inline int
-rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
-    struct mbuf *m, struct mbuf *rx_gather[],
-    unsigned int gather_idx)
-{
-	
-	rq->offload_pkts++;
-	m->m_pkthdr.header = mtod(m, void *);
-	rx_gather[gather_idx++] = m;
-	if (gather_idx == RX_BUNDLE_SIZE) {
-		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
-		gather_idx = 0;
-		rq->offload_bundles++;
-	}
-	return (gather_idx);
-}
+#endif
 
 static void
 restart_tx(struct sge_qset *qs)
 {
 	struct adapter *sc = qs->port->adapter;
-	
-	
+
 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 		qs->txq[TXQ_OFLD].restarts++;
-		DPRINTF("restarting TXQ_OFLD\n");
 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 	}
-	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
-	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
-	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
-	    qs->txq[TXQ_CTRL].in_use);
-	
+
 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 		qs->txq[TXQ_CTRL].restarts++;
-		DPRINTF("restarting TXQ_CTRL\n");
 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 	}
 }
@@ -2568,6 +2427,7 @@
 
 	MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 	q->port = pi;
+	q->adap = sc;
 
 	if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 	    M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
@@ -2629,8 +2489,10 @@
 		q->txq[i].gen = 1;
 		q->txq[i].size = p->txq_size[i];
 	}
-	
+
+#ifdef TCP_OFFLOAD
 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
+#endif
 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
@@ -2668,7 +2530,7 @@
 
 	/* Allocate and setup the lro_ctrl structure */
 	q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
-#ifdef INET
+#if defined(INET6) || defined(INET)
 	ret = tcp_lro_init(&q->lro.ctrl);
 	if (ret) {
 		printf("error %d from tcp_lro_init\n", ret);
@@ -2735,8 +2597,7 @@
 	
 	mtx_unlock_spin(&sc->sge.reg_lock);
 	t3_update_qset_coalesce(q, p);
-	q->port = pi;
-	
+
 	refill_fl(sc, &q->fl[0], q->fl[0].size);
 	refill_fl(sc, &q->fl[1], q->fl[1].size);
 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
@@ -2761,22 +2622,12 @@
  * will also be taken into account here.
  */
 void
-t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
+t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
 {
 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 	struct ifnet *ifp = pi->ifp;
 	
-	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
-
-	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
-	    cpl->csum_valid && cpl->csum == 0xffff) {
-		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
-		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
-		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
-		m->m_pkthdr.csum_data = 0xffff;
-	}
-
 	if (cpl->vlan_valid) {
 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 		m->m_flags |= M_VLANTAG;
@@ -2790,6 +2641,30 @@
 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 	m->m_len -= (sizeof(*cpl) + ethpad);
 	m->m_data += (sizeof(*cpl) + ethpad);
+
+	if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
+		struct ether_header *eh = mtod(m, void *);
+		uint16_t eh_type;
+
+		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
+			struct ether_vlan_header *evh = mtod(m, void *);
+
+			eh_type = evh->evl_proto;
+		} else
+			eh_type = eh->ether_type;
+
+		if (ifp->if_capenable & IFCAP_RXCSUM &&
+		    eh_type == htons(ETHERTYPE_IP)) {
+			m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
+			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+			m->m_pkthdr.csum_data = 0xffff;
+		} else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
+		    eh_type == htons(ETHERTYPE_IPV6)) {
+			m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
+			    CSUM_PSEUDO_HDR);
+			m->m_pkthdr.csum_data = 0xffff;
+		}
+	}
 }
 
 /**
@@ -2961,11 +2836,11 @@
 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
 	int budget_left = budget;
 	unsigned int sleeping = 0;
+#if defined(INET6) || defined(INET)
 	int lro_enabled = qs->lro.enabled;
 	int skip_lro;
 	struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
-	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
-	int ngathered = 0;
+#endif
 	struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 #ifdef DEBUG	
 	static int last_holdoff = 0;
@@ -2979,10 +2854,10 @@
 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
 		int eth, eop = 0, ethpad = 0;
 		uint32_t flags = ntohl(r->flags);
-		uint32_t rss_csum = *(const uint32_t *)r;
 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
+		uint8_t opcode = r->rss_hdr.opcode;
 		
-		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
+		eth = (opcode == CPL_RX_PKT);
 		
 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 			struct mbuf *m;
@@ -3002,27 +2877,27 @@
                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
-			rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
+			opcode = CPL_ASYNC_NOTIF;
 			eop = 1;
                         rspq->async_notif++;
 			goto skip;
 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
-			struct mbuf *m = NULL;
-
-			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
-			    r->rss_hdr.opcode, rspq->cidx);
-			if (mh->mh_head == NULL)
-				mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
-                        else 
-				m = m_gethdr(M_DONTWAIT, MT_DATA);
-
-			if (mh->mh_head == NULL &&  m == NULL) {	
+			struct mbuf *m = m_gethdr(M_DONTWAIT, MT_DATA);
+
+			if (m == NULL) {	
 		no_mem:
 				rspq->next_holdoff = NOMEM_INTR_DELAY;
 				budget_left--;
 				break;
 			}
-			get_imm_packet(adap, r, mh->mh_head);
+			if (mh->mh_head == NULL)
+				mh->mh_head = m;
+                        else 
+				mh->mh_tail->m_next = m;
+			mh->mh_tail = m;
+
+			get_imm_packet(adap, r, m);
+			mh->mh_head->m_pkthdr.len += m->m_len;
 			eop = 1;
 			rspq->imm_data++;
 		} else if (r->len_cq) {
@@ -3045,34 +2920,18 @@
 			handle_rsp_cntrl_info(qs, flags);
 		}
 
-		r++;
-		if (__predict_false(++rspq->cidx == rspq->size)) {
-			rspq->cidx = 0;
-			rspq->gen ^= 1;
-			r = rspq->desc;
-		}
-
-		if (++rspq->credits >= 64) {
-			refill_rspq(adap, rspq, rspq->credits);
-			rspq->credits = 0;
-		}
 		if (!eth && eop) {
-			mh->mh_head->m_pkthdr.csum_data = rss_csum;
-			/*
-			 * XXX size mismatch
-			 */
-			m_set_priority(mh->mh_head, rss_hash);
-
-			
-			ngathered = rx_offload(&adap->tdev, rspq,
-			    mh->mh_head, offload_mbufs, ngathered);
+			rspq->offload_pkts++;
+#ifdef TCP_OFFLOAD
+			adap->cpl_handler[opcode](qs, r, mh->mh_head);
+#else
+			m_freem(mh->mh_head);
+#endif
 			mh->mh_head = NULL;
-			DPRINTF("received offload packet\n");
-			
 		} else if (eth && eop) {
 			struct mbuf *m = mh->mh_head;
 
-			t3_rx_eth(adap, rspq, m, ethpad);
+			t3_rx_eth(adap, m, ethpad);
 
 			/*
 			 * The T304 sends incoming packets on any qset.  If LRO
@@ -3082,15 +2941,16 @@
 			 * The mbuf's rcvif was derived from the cpl header and
 			 * is accurate.  Skip LRO and just use that.
 			 */
+#if defined(INET6) || defined(INET)
 			skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 
 			if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
-#ifdef INET
 			    && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
-#endif
 			    ) {
 				/* successfully queue'd for LRO */
-			} else {
+			} else
+#endif
+			{
 				/*
 				 * LRO not enabled, packet unsuitable for LRO,
 				 * or unable to queue.  Pass it up right now in
@@ -3102,14 +2962,24 @@
 			mh->mh_head = NULL;
 
 		}
+
+		r++;
+		if (__predict_false(++rspq->cidx == rspq->size)) {
+			rspq->cidx = 0;
+			rspq->gen ^= 1;
+			r = rspq->desc;
+		}
+
+		if (++rspq->credits >= 64) {
+			refill_rspq(adap, rspq, rspq->credits);
+			rspq->credits = 0;
+		}
 		__refill_fl_lt(adap, &qs->fl[0], 32);
 		__refill_fl_lt(adap, &qs->fl[1], 32);
 		--budget_left;
 	}
 
-	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
-
-#ifdef INET
+#if defined(INET6) || defined(INET)
 	/* Flush LRO */
 	while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 		struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/sys/mvec.h
--- a/head/sys/dev/cxgb/sys/mvec.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/sys/mvec.h	Wed Jul 25 17:04:43 2012 +0300
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: head/sys/dev/cxgb/sys/mvec.h 237263 2012-06-19 07:34:13Z np $
  *
  */
 
@@ -31,15 +31,6 @@
 #define _MVEC_H_
 #include <machine/bus.h>
 
-#define	M_DDP		0x200000	/* direct data placement mbuf */
-#define	EXT_PHYS	10		/* physical/bus address  */
-
-#define m_cur_offset	m_ext.ext_size		/* override to provide ddp offset */
-#define m_seq		m_pkthdr.csum_data	/* stored sequence */
-#define m_ddp_gl	m_ext.ext_buf		/* ddp list	*/
-#define m_ddp_flags	m_pkthdr.csum_flags	/* ddp flags	*/
-#define m_ulp_mode	m_pkthdr.tso_segsz	/* upper level protocol	*/
-
 static __inline void
 busdma_map_mbuf_fast(bus_dma_tag_t tag, bus_dmamap_t map,
     struct mbuf *m, bus_dma_segment_t *seg)
@@ -58,17 +49,6 @@
     struct mbuf **m, bus_dma_segment_t *segs, int *nsegs);
 void busdma_map_sg_vec(bus_dma_tag_t tag, bus_dmamap_t map,
     struct mbuf *m, bus_dma_segment_t *segs, int *nsegs);
-static __inline int
-busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t *segs, int count) 
-{
-	while (count--) {
-		segs->ds_addr = pmap_kextract((vm_offset_t)vsegs->ds_addr);
-		segs->ds_len = vsegs->ds_len;
-		segs++;
-		vsegs++;
-	}
-	return (0);
-}
 
 static __inline void
 m_freem_list(struct mbuf *m)
@@ -84,5 +64,4 @@
 	}	
 }
 
-
 #endif /* _MVEC_H_ */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/t3cdev.h
--- a/head/sys/dev/cxgb/t3cdev.h	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-/*-
- * Copyright (c) 2007-2008, Chelsio Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-#ifndef _T3CDEV_H_
-#define _T3CDEV_H_
-
-#define T3CNAMSIZ 16
-
-/* Get the t3cdev associated with an ifnet */
-#define T3CDEV(ifp) (&(((struct port_info *)(ifp)->if_softc))->adapter->tdev)
-
-struct cxgb3_client;
-
-enum t3ctype {
-        T3A = 0,
-        T3B,
-	T3C
-};
-
-struct t3cdev {
-	char name[T3CNAMSIZ];		    /* T3C device name */
-	enum t3ctype type;
-	TAILQ_ENTRY(t3cdev) entry;  /* for list linking */
-        struct ifnet *lldev;     /* LL dev associated with T3C messages */
-	struct adapter *adapter;			    
-	int (*send)(struct t3cdev *dev, struct mbuf *m);
-	int (*recv)(struct t3cdev *dev, struct mbuf **m, int n);
-	int (*ctl)(struct t3cdev *dev, unsigned int req, void *data);
-	void (*arp_update)(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr, struct sockaddr *sa);
-	void *priv;                         /* driver private data */
-	void *l2opt;                        /* optional layer 2 data */
-	void *l3opt;                        /* optional layer 3 data */
-	void *l4opt;                        /* optional layer 4 data */
-	void *ulp;			    /* ulp stuff */
-};
-
-#endif /* _T3CDEV_H_ */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c	Wed Jul 25 17:04:43 2012 +0300
@@ -27,13 +27,14 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c 237263 2012-06-19 07:34:13Z np $");
+
+#include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -54,20 +55,14 @@
 #include <sys/proc.h>
 #include <sys/eventhandler.h>
 
-#if __FreeBSD_version < 800044
-#define V_ifnet ifnet
-#endif
+#include <netinet/in.h>
+#include <netinet/toecore.h>
 
-#include <net/if.h>
-#include <net/if_var.h>
-#if __FreeBSD_version >= 800056
-#include <net/vnet.h>
-#endif
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
-#include <netinet/in.h>
-
-#include <contrib/rdma/ib_verbs.h>
-
+#ifdef TCP_OFFLOAD
 #include <cxgb_include.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
@@ -75,26 +70,21 @@
 #include <ulp/iw_cxgb/iw_cxgb_cm.h>
 #include <ulp/iw_cxgb/iw_cxgb.h>
 
-/*
- * XXX :-/
- * 
- */
+static int iwch_mod_load(void);
+static int iwch_mod_unload(void);
+static int iwch_activate(struct adapter *);
+static int iwch_deactivate(struct adapter *);
 
-#define idr_init(x)
-
-cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-
-static void open_rnic_dev(struct t3cdev *);
-static void close_rnic_dev(struct t3cdev *);
-
-static TAILQ_HEAD( ,iwch_dev) dev_list;
-static struct mtx dev_mutex;
-static eventhandler_tag event_tag;
+static struct uld_info iwch_uld_info = {
+	.uld_id = ULD_IWARP,
+	.activate = iwch_activate,
+	.deactivate = iwch_deactivate,
+};
 
 static void
 rnic_init(struct iwch_dev *rnicp)
 {
-	CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__,  rnicp);
+
 	idr_init(&rnicp->cqidr);
 	idr_init(&rnicp->qpidr);
 	idr_init(&rnicp->mmidr);
@@ -103,15 +93,16 @@
 	rnicp->attr.vendor_id = 0x168;
 	rnicp->attr.vendor_part_id = 7;
 	rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
-	rnicp->attr.max_wrs = (1UL << 24) - 1;
+	rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
 	rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
 	rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
 	rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
-	rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
+	rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
 	rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
 	rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
 	rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
-	rnicp->attr.mem_pgsizes_bitmask = 0x7FFF;	/* 4KB-128MB */
+	rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
+	rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
 	rnicp->attr.can_resize_wq = 0;
 	rnicp->attr.max_rdma_reads_per_qp = 8;
 	rnicp->attr.max_rdma_read_resources =
@@ -127,170 +118,183 @@
 	rnicp->attr.zbva_support = 1;
 	rnicp->attr.local_invalidate_fence = 1;
 	rnicp->attr.cq_overflow_detection = 1;
+
 	return;
 }
 
 static void
-open_rnic_dev(struct t3cdev *tdev)
+rnic_uninit(struct iwch_dev *rnicp)
+{
+	idr_destroy(&rnicp->cqidr);
+	idr_destroy(&rnicp->qpidr);
+	idr_destroy(&rnicp->mmidr);
+	mtx_destroy(&rnicp->lock);
+}
+
+static int
+iwch_activate(struct adapter *sc)
 {
 	struct iwch_dev *rnicp;
-	static int vers_printed;
+	int rc;
 
-	CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__,  tdev);
-	if (!vers_printed++)
-		printf("Chelsio T3 RDMA Driver - version x.xx\n");
+	KASSERT(!isset(&sc->offload_map, MAX_NPORTS),
+	    ("%s: iWARP already activated on %s", __func__,
+	    device_get_nameunit(sc->dev)));
+
 	rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
-	if (!rnicp) {
-		printf("Cannot allocate ib device\n");
-		return;
-	}
-	rnicp->rdev.ulp = rnicp;
-	rnicp->rdev.t3cdev_p = tdev;
+	if (rnicp == NULL)
+		return (ENOMEM);
 
-	mtx_lock(&dev_mutex);
+	sc->iwarp_softc = rnicp;
+	rnicp->rdev.adap = sc;
 
-	if (cxio_rdev_open(&rnicp->rdev)) {
-		mtx_unlock(&dev_mutex);
+	cxio_hal_init(sc);
+	iwch_cm_init_cpl(sc);
+
+	rc = cxio_rdev_open(&rnicp->rdev);
+	if (rc != 0) {
 		printf("Unable to open CXIO rdev\n");
-		ib_dealloc_device(&rnicp->ibdev);
-		return;
+		goto err1;
 	}
 
 	rnic_init(rnicp);
 
-	TAILQ_INSERT_TAIL(&dev_list, rnicp, entry);
-	mtx_unlock(&dev_mutex);
+	rc = iwch_register_device(rnicp);
+	if (rc != 0) {
+		printf("Unable to register device\n");
+		goto err2;
+	}
 
-	if (iwch_register_device(rnicp)) {
-		printf("Unable to register device\n");
-		close_rnic_dev(tdev);
-	}
-#ifdef notyet	
-	printf("Initialized device %s\n",
-	       pci_name(rnicp->rdev.rnic_info.pdev));
-#endif	
-	return;
+	return (0);
+
+err2:
+	rnic_uninit(rnicp);
+	cxio_rdev_close(&rnicp->rdev);
+err1:
+	cxio_hal_uninit(sc);
+	iwch_cm_term_cpl(sc);
+	sc->iwarp_softc = NULL;
+
+	return (rc);
+}
+
+static int
+iwch_deactivate(struct adapter *sc)
+{
+	struct iwch_dev *rnicp;
+
+	rnicp = sc->iwarp_softc;
+
+	iwch_unregister_device(rnicp);
+	rnic_uninit(rnicp);
+	cxio_rdev_close(&rnicp->rdev);
+	cxio_hal_uninit(sc);
+	iwch_cm_term_cpl(sc);
+	ib_dealloc_device(&rnicp->ibdev);
+
+	sc->iwarp_softc = NULL;
+
+	return (0);
 }
 
 static void
-close_rnic_dev(struct t3cdev *tdev)
+iwch_activate_all(struct adapter *sc, void *arg __unused)
 {
-	struct iwch_dev *dev, *tmp;
-	CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__,  tdev);
-	mtx_lock(&dev_mutex);
-
-	TAILQ_FOREACH_SAFE(dev, &dev_list, entry, tmp) {
-		if (dev->rdev.t3cdev_p == tdev) {
-#ifdef notyet			
-			list_del(&dev->entry);
-			iwch_unregister_device(dev);
-			cxio_rdev_close(&dev->rdev);
-			idr_destroy(&dev->cqidr);
-			idr_destroy(&dev->qpidr);
-			idr_destroy(&dev->mmidr);
-			ib_dealloc_device(&dev->ibdev);
-#endif			
-			break;
-		}
-	}
-	mtx_unlock(&dev_mutex);
-}
-
-static ifaddr_event_handler_t
-ifaddr_event_handler(void *arg, struct ifnet *ifp)
-{
-	printf("%s if name %s \n", __FUNCTION__, ifp->if_xname);
-	if (ifp->if_capabilities & IFCAP_TOE4) {
-		KASSERT(T3CDEV(ifp) != NULL, ("null t3cdev ptr!"));
-		if (cxio_hal_find_rdev_by_t3cdev(T3CDEV(ifp)) == NULL)
-			open_rnic_dev(T3CDEV(ifp));
-	}
-	return 0;
-}
-
-
-static int
-iwch_init_module(void)
-{
-	VNET_ITERATOR_DECL(vnet_iter);
-	int err;
-	struct ifnet *ifp;
-
-	printf("%s enter\n", __FUNCTION__);
-	TAILQ_INIT(&dev_list);
-	mtx_init(&dev_mutex, "iwch dev_list lock", NULL, MTX_DEF);
-	
-	err = cxio_hal_init();
-	if (err)
-		return err;
-	err = iwch_cm_init();
-	if (err)
-		return err;
-	cxio_register_ev_cb(iwch_ev_dispatch);
-
-	/* Register for ifaddr events to dynamically add TOE devs */
-	event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_event_handler,
-			NULL, EVENTHANDLER_PRI_ANY);
-
-	/* Register existing TOE interfaces by walking the ifnet chain */
-	IFNET_RLOCK();
-	VNET_LIST_RLOCK();
-	VNET_FOREACH(vnet_iter) {
-		CURVNET_SET(vnet_iter); /* XXX CURVNET_SET_QUIET() ? */
-		TAILQ_FOREACH(ifp, &V_ifnet, if_link)
-			(void)ifaddr_event_handler(NULL, ifp);
-		CURVNET_RESTORE();
-	}
-	VNET_LIST_RUNLOCK();
-	IFNET_RUNLOCK();
-	return 0;
+	ADAPTER_LOCK(sc);
+	if ((sc->open_device_map & sc->offload_map) != 0 &&
+	    t3_activate_uld(sc, ULD_IWARP) == 0)
+		setbit(&sc->offload_map, MAX_NPORTS);
+	ADAPTER_UNLOCK(sc);
 }
 
 static void
-iwch_exit_module(void)
+iwch_deactivate_all(struct adapter *sc, void *arg __unused)
 {
-	EVENTHANDLER_DEREGISTER(ifaddr_event, event_tag);
-	cxio_unregister_ev_cb(iwch_ev_dispatch);
-	iwch_cm_term();
-	cxio_hal_exit();
+	ADAPTER_LOCK(sc);
+	if (isset(&sc->offload_map, MAX_NPORTS) &&
+	    t3_deactivate_uld(sc, ULD_IWARP) == 0)
+		clrbit(&sc->offload_map, MAX_NPORTS);
+	ADAPTER_UNLOCK(sc);
 }
 
-static int 
-iwch_load(module_t mod, int cmd, void *arg)
+static int
+iwch_mod_load(void)
 {
-        int err = 0;
+	int rc;
 
-        switch (cmd) {
-        case MOD_LOAD:
-                printf("Loading iw_cxgb.\n");
+	rc = iwch_cm_init();
+	if (rc != 0)
+		return (rc);
 
-                iwch_init_module();
-                break;
-        case MOD_QUIESCE:
-                break;
-        case MOD_UNLOAD:
-                printf("Unloading iw_cxgb.\n");
-		iwch_exit_module();
-                break;
-        case MOD_SHUTDOWN:
-                break;
-        default:
-                err = EOPNOTSUPP;
-                break;
-        }
+	rc = t3_register_uld(&iwch_uld_info);
+	if (rc != 0) {
+		iwch_cm_term();
+		return (rc);
+	}
 
-        return (err);
+	t3_iterate(iwch_activate_all, NULL);
+
+	return (rc);
 }
 
-static moduledata_t mod_data = {
+static int
+iwch_mod_unload(void)
+{
+	t3_iterate(iwch_deactivate_all, NULL);
+
+	iwch_cm_term();
+
+	if (t3_unregister_uld(&iwch_uld_info) == EBUSY)
+		return (EBUSY);
+
+	return (0);
+}
+#endif	/* TCP_OFFLOAD */
+
+#undef MODULE_VERSION
+#include <sys/module.h>
+
+static int
+iwch_modevent(module_t mod, int cmd, void *arg)
+{
+	int rc = 0;
+
+#ifdef TCP_OFFLOAD
+	switch (cmd) {
+	case MOD_LOAD:
+		rc = iwch_mod_load();
+		if(rc)
+			printf("iw_cxgb: Chelsio T3 RDMA Driver failed to load\n");
+		else
+			printf("iw_cxgb: Chelsio T3 RDMA Driver loaded\n");
+		break;
+
+	case MOD_UNLOAD:
+		rc = iwch_mod_unload();
+		if(rc)
+			printf("iw_cxgb: Chelsio T3 RDMA Driver failed to unload\n");
+		else
+			printf("iw_cxgb: Chelsio T3 RDMA Driver unloaded\n");
+		break;
+
+	default:
+		rc = EINVAL;
+	}
+#else
+	printf("iw_cxgb: compiled without TCP_OFFLOAD support.\n");
+	rc = EOPNOTSUPP;
+#endif
+	return (rc);
+}
+
+static moduledata_t iwch_mod_data = {
 	"iw_cxgb",
-	iwch_load,
+	iwch_modevent,
 	0
 };
 
 MODULE_VERSION(iw_cxgb, 1);
-DECLARE_MODULE(iw_cxgb, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
-MODULE_DEPEND(iw_cxgb, rdma_core, 1, 1, 1);
-MODULE_DEPEND(iw_cxgb, if_cxgb, 1, 1, 1);
+DECLARE_MODULE(iw_cxgb, iwch_mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
+MODULE_DEPEND(t3_tom, cxgbc, 1, 1, 1);
+MODULE_DEPEND(iw_cxgb, toecore, 1, 1, 1);
 MODULE_DEPEND(iw_cxgb, t3_tom, 1, 1, 1);
-
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 
@@ -37,6 +37,13 @@
 struct iwch_qp;
 struct iwch_mr;
 
+enum t3ctype {
+        T3A = 0,
+        T3B,
+        T3C
+};
+
+#define PAGE_MASK_IWARP (~(PAGE_SIZE-1))
 
 struct iwch_rnic_attributes {
 	u32 vendor_id;
@@ -57,6 +64,7 @@
 	 * size (4k)^i.  Phys block list mode unsupported.
 	 */
 	u32 mem_pgsizes_bitmask;
+	u64 max_mr_size;
 	u8 can_resize_wq;
 
 	/*
@@ -97,9 +105,9 @@
 	struct cxio_rdev rdev;
 	u32 device_cap_flags;
 	struct iwch_rnic_attributes attr;
-	struct kvl cqidr;
-	struct kvl qpidr;
-	struct kvl mmidr;
+	struct idr cqidr;
+	struct idr qpidr;
+	struct idr mmidr;
 	struct mtx lock;
 	TAILQ_ENTRY(iwch_dev) entry;
 };
@@ -113,40 +121,43 @@
 	return container_of(ibdev, struct iwch_dev, ibdev);
 }
 
-static inline int t3b_device(const struct iwch_dev *rhp)
+static inline int t3b_device(const struct iwch_dev *rhp __unused)
 {
-	return rhp->rdev.t3cdev_p->type == T3B;
+	return (0);
 }
 
-static inline int t3a_device(const struct iwch_dev *rhp)
+static inline int t3a_device(const struct iwch_dev *rhp __unused)
 {
-	return rhp->rdev.t3cdev_p->type == T3A;
+	return (0);
 }
 
 static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
 {
-	return kvl_lookup(&rhp->cqidr, cqid);
+	return idr_find(&rhp->cqidr, cqid);
 }
 
 static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
 {
-	return kvl_lookup(&rhp->qpidr, qpid);
+	return idr_find(&rhp->qpidr, qpid);
 }
 
 static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
 {
-	return kvl_lookup(&rhp->mmidr, mmid);
+	return idr_find(&rhp->mmidr, mmid);
 }
 
-static inline int insert_handle(struct iwch_dev *rhp, struct kvl *kvlp,
+static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
 				void *handle, u32 id)
 {
 	int ret;
 	u32 newid;
 
 	do {
+		if (!idr_pre_get(idr, GFP_KERNEL)) {
+                        return -ENOMEM;
+                }
 		mtx_lock(&rhp->lock);
-		ret = kvl_alloc_above(kvlp, handle, id, &newid);
+		ret = idr_get_new_above(idr, handle, id, &newid);
 		WARN_ON(ret != 0);
 		WARN_ON(!ret && newid != id);
 		mtx_unlock(&rhp->lock);
@@ -155,14 +166,12 @@
 	return ret;
 }
 
-static inline void remove_handle(struct iwch_dev *rhp, struct kvl *kvlp, u32 id)
+static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
 {
 	mtx_lock(&rhp->lock);
-	kvl_delete(kvlp, id);
+	idr_remove(idr, id);
 	mtx_unlock(&rhp->lock);
 }
 
-extern struct cxgb_client t3c_client;
-extern cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m);
+void iwch_ev_dispatch(struct iwch_dev *, struct mbuf *);
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c	Wed Jul 25 17:04:43 2012 +0300
@@ -27,13 +27,15 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -66,13 +68,17 @@
 #include <netinet/tcp.h>
 #include <netinet/tcpip.h>
 
-#include <contrib/rdma/ib_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
 #include <cxgb_include.h>
 #include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_defs.h>
 #include <ulp/tom/cxgb_toepcb.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
@@ -97,46 +103,46 @@
 };
 #endif
 
-SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
+SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
 
-static int ep_timeout_secs = 10;
+static int ep_timeout_secs = 60;
 TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0,
-    "CM Endpoint operation timeout in seconds (default=10)");
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0,
+    "CM Endpoint operation timeout in seconds (default=60)");
 
 static int mpa_rev = 1;
 TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
     "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
 
 static int markers_enabled = 0;
 TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0,
     "Enable MPA MARKERS (default(0)=disabled)");
 
 static int crc_enabled = 1;
 TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
     "Enable MPA CRC (default(1)=enabled)");
 
 static int rcv_win = 256 * 1024;
 TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
     "TCP receive window in bytes (default=256KB)");
 
 static int snd_win = 32 * 1024;
 TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
     "TCP send window in bytes (default=32KB)");
 
 static unsigned int nocong = 0;
 TUNABLE_INT("hw.iw_cxgb.nocong", &nocong);
-SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0,
+SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RW, &nocong, 0,
     "Turn off congestion control (default=0)");
 
 static unsigned int cong_flavor = 1;
 TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor);
-SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0,
+SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RW, &cong_flavor, 0,
     "TCP Congestion control flavor (default=1)");
 
 static void ep_timeout(void *arg);
@@ -174,42 +180,44 @@
 stop_ep_timer(struct iwch_ep *ep)
 {
 	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+	if (!callout_pending(&ep->timer)) {
+		CTR3(KTR_IW_CXGB, "%s timer stopped when its not running!  ep %p state %u\n",
+                       __func__, ep, ep->com.state);
+		return;
+	}
 	callout_drain(&ep->timer);
 	put_ep(&ep->com);
 }
 
-static int set_tcpinfo(struct iwch_ep *ep)
+static int
+set_tcpinfo(struct iwch_ep *ep)
 {
-	struct tcp_info ti;
-	struct sockopt sopt;
-	int err;
+	struct socket *so = ep->com.so;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+	struct toepcb *toep;
+	int rc = 0;
 
-	sopt.sopt_dir = SOPT_GET;
-	sopt.sopt_level = IPPROTO_TCP;
-	sopt.sopt_name = TCP_INFO;
-	sopt.sopt_val = (caddr_t)&ti;
-	sopt.sopt_valsize = sizeof ti;
-	sopt.sopt_td = NULL;
-	
-	err = sogetopt(ep->com.so, &sopt);
-	if (err) {
-		printf("%s can't get tcpinfo\n", __FUNCTION__);
-		return -err;
+	INP_WLOCK(inp);
+	tp = intotcpcb(inp);
+
+	if ((tp->t_flags & TF_TOE) == 0) {
+		rc = EINVAL;
+		printf("%s: connection NOT OFFLOADED!\n", __func__);
+		goto done;
 	}
-	if (!(ti.tcpi_options & TCPI_OPT_TOE)) {
-		printf("%s connection NOT OFFLOADED!\n", __FUNCTION__);
-		return -EINVAL;
-	}
+	toep = tp->t_toe;
 
-	ep->snd_seq = ti.tcpi_snd_nxt;
-	ep->rcv_seq = ti.tcpi_rcv_nxt;
-	ep->emss = ti.tcpi_snd_mss - sizeof(struct tcpiphdr);
-	ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */
-	if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS)
-		ep->emss -= 12;
+	ep->hwtid = toep->tp_tid;
+	ep->snd_seq = tp->snd_nxt;
+	ep->rcv_seq = tp->rcv_nxt;
+	ep->emss = tp->t_maxseg;
 	if (ep->emss < 128)
 		ep->emss = 128;
-	return 0;
+done:
+	INP_WUNLOCK(inp);
+	return (rc);
+
 }
 
 static enum iwch_ep_state
@@ -264,56 +272,6 @@
 	free(epc, M_DEVBUF);
 }
 
-int
-iwch_quiesce_tid(struct iwch_ep *ep)
-{
-#ifdef notyet
-	struct cpl_set_tcb_field *req;
-	struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
-
-	if (m == NULL)
-		return (-ENOMEM);
-	req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
-	req->reply = 0;
-	req->cpu_idx = 0;
-	req->word = htons(W_TCB_RX_QUIESCE);
-	req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
-	req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
-
-	m_set_priority(m, CPL_PRIORITY_DATA); 
-	cxgb_ofld_send(ep->com.tdev, m);
-#endif
-	return 0;
-}
-
-int
-iwch_resume_tid(struct iwch_ep *ep)
-{
-#ifdef notyet
-	struct cpl_set_tcb_field *req;
-	struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
-
-	if (m == NULL)
-		return (-ENOMEM);
-	req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
-	req->reply = 0;
-	req->cpu_idx = 0;
-	req->word = htons(W_TCB_RX_QUIESCE);
-	req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
-	req->val = 0;
-
-	m_set_priority(m, CPL_PRIORITY_DATA);
-	cxgb_ofld_send(ep->com.tdev, m);
-#endif
-	return 0;
-}
-
 static struct rtentry *
 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
     __be16 peer_port, u8 tos)
@@ -331,13 +289,16 @@
 }
 
 static void
-close_socket(struct iwch_ep_common *epc)
+close_socket(struct iwch_ep_common *epc, int close)
 {
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
 	SOCK_LOCK(epc->so);
 	soupcall_clear(epc->so, SO_RCV);
 	SOCK_UNLOCK(epc->so);
-	soshutdown(epc->so, SHUT_WR|SHUT_RD);
+	if (close)
+		soclose(epc->so);
+	else
+		soshutdown(epc->so, SHUT_WR|SHUT_RD);
 	epc->so = NULL;
 }
 
@@ -500,7 +461,7 @@
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	state_set(&ep->com, ABORTING);
 	abort_socket(ep);
-	close_socket(&ep->com);
+	close_socket(&ep->com, 0);
 	close_complete_upcall(ep);
 	state_set(&ep->com, DEAD);
 	put_ep(&ep->com);
@@ -582,12 +543,13 @@
 	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 	event.provider_data = ep;
 	event.so = ep->com.so;
-	if (state_read(&ep->parent_ep->com) != DEAD)
+	if (state_read(&ep->parent_ep->com) != DEAD) {
+		get_ep(&ep->com);
 		ep->parent_ep->com.cm_id->event_handler(
 						ep->parent_ep->com.cm_id,
 						&event);
+	}
 	put_ep(&ep->parent_ep->com);
-	ep->parent_ep = NULL;
 }
 
 static void
@@ -729,6 +691,7 @@
 	 */
 	CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
 	state_set(&ep->com, FPDU_MODE);
+	ep->mpa_attr.initiator = 1;
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -885,6 +848,7 @@
 	 * If we get here we have accumulated the entire mpa
 	 * start reply message including private data.
 	 */
+	ep->mpa_attr.initiator = 0;
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -934,7 +898,6 @@
 		 * rejects the CR.
 		 */
 		__state_set(&ep->com, CLOSING);
-		get_ep(&ep->com);
 		break;
 	case MPA_REP_SENT:
 		__state_set(&ep->com, CLOSING);
@@ -961,7 +924,7 @@
 			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
 				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
 		}
-		close_socket(&ep->com);
+		close_socket(&ep->com, 0);
 		close_complete_upcall(ep);
 		__state_set(&ep->com, DEAD);
 		release = 1;
@@ -986,11 +949,10 @@
 {
 	struct iwch_qp_attributes attrs;
 	int ret;
-	int state;
 
-	state = state_read(&ep->com);
-	CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]);
-	switch (state) {
+	mtx_lock(&ep->com.lock);
+	CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state);
+	switch (ep->com.state) {
 	case MPA_REQ_WAIT:
 		stop_ep_timer(ep);
 		break;
@@ -1009,7 +971,6 @@
 		 * the reference on it until the ULP accepts or
 		 * rejects the CR.
 		 */
-		get_ep(&ep->com);
 		break;
 	case MORIBUND:
 	case CLOSING:
@@ -1031,6 +992,7 @@
 	case ABORTING:
 		break;
 	case DEAD:
+		mtx_unlock(&ep->com.lock);
 		CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, 
 			ep->com.so->so_error);
 		return;
@@ -1039,11 +1001,12 @@
 		break;
 	}
 
-	if (state != ABORTING) {
-		close_socket(&ep->com);
-		state_set(&ep->com, DEAD);
+	if (ep->com.state != ABORTING) {
+		close_socket(&ep->com, 0);
+		__state_set(&ep->com, DEAD);
 		put_ep(&ep->com);
 	}
+	mtx_unlock(&ep->com.lock);
 	return;
 }
 
@@ -1071,7 +1034,10 @@
 					     IWCH_QP_ATTR_NEXT_STATE,
 					     &attrs, 1);
 		}
-		close_socket(&ep->com);
+		if (ep->parent_ep)
+			close_socket(&ep->com, 1);
+		else
+			close_socket(&ep->com, 0);
 		close_complete_upcall(ep);
 		__state_set(&ep->com, DEAD);
 		release = 1;
@@ -1102,77 +1068,59 @@
  * terminate() handles case (1)...
  */
 static int
-terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx)
+terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	struct toepcb *toep = (struct toepcb *)ctx;
-	struct socket *so = toeptoso(toep);
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	uint32_t hash = *((uint32_t *)r + 1);
+	unsigned int tid = ntohl(hash) >> 8 & 0xfffff;
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+	struct socket *so = toep->tp_inp->inp_socket;
 	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
 
-	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+	if (state_read(&ep->com) != FPDU_MODE)
+		goto done;
+
 	m_adj(m, sizeof(struct cpl_rdma_terminate));
-	CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len);
+
+	CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes",
+	    __func__, tid, ep, m->m_len);
+
 	m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
 	ep->com.qp->attr.terminate_msg_len = m->m_len;
 	ep->com.qp->attr.is_terminate_local = 0;
-	return CPL_RET_BUF_DONE;
+
+done:
+	m_freem(m);
+	return (0);
 }
 
 static int
-ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx)
+ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	struct toepcb *toep = (struct toepcb *)ctx;
-	struct socket *so = toeptoso(toep);
-	struct cpl_rdma_ec_status *rep = cplhdr(m);
-	struct iwch_ep *ep;
-	struct iwch_qp_attributes attrs;
-	int release = 0;
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct cpl_rdma_ec_status *rep = mtod(m, void *);
+	unsigned int tid = GET_TID(rep);
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+	struct socket *so = toep->tp_inp->inp_socket;
+	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
 
-	ep = so->so_rcv.sb_upcallarg;
-	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status);
-	if (!so || !ep) {
-		panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1); 
-	}
-	mtx_lock(&ep->com.lock);
-	switch (ep->com.state) {
-	case CLOSING:
-		if (!rep->status)
-			__state_set(&ep->com, MORIBUND);
-		else
-			__state_set(&ep->com, ABORTING);
-		break;
-	case MORIBUND:
+	if (rep->status) {
+		struct iwch_qp_attributes attrs;
+
+		CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__);
 		stop_ep_timer(ep);
-		if (!rep->status) {
-			if ((ep->com.cm_id) && (ep->com.qp)) {
-				attrs.next_state = IWCH_QP_STATE_IDLE;
-				iwch_modify_qp(ep->com.qp->rhp,
-					     ep->com.qp,
-					     IWCH_QP_ATTR_NEXT_STATE,
-					     &attrs, 1);
-			}
-			close_socket(&ep->com);
-			close_complete_upcall(ep);
-			__state_set(&ep->com, DEAD);
-			release = 1;
-		}
-		break;
-	case DEAD:
-		break;
-	default:
-		panic("unknown state: %d\n", ep->com.state);
-	}
-	mtx_unlock(&ep->com.lock);
-	if (rep->status) {
-		log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n",
-		       __FUNCTION__, ep->hwtid);
 		attrs.next_state = IWCH_QP_STATE_ERROR;
 		iwch_modify_qp(ep->com.qp->rhp,
-			       ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
-			       &attrs, 1);
+			     ep->com.qp,
+			     IWCH_QP_ATTR_NEXT_STATE,
+			     &attrs, 1);
+		abort_connection(ep);
 	}
-	if (release)
-		put_ep(&ep->com);
-	return CPL_RET_BUF_DONE;
+
+	m_freem(m);
+	return (0);
 }
 
 static void
@@ -1181,24 +1129,29 @@
 	struct iwch_ep *ep = (struct iwch_ep *)arg;
 	struct iwch_qp_attributes attrs;
 	int err = 0;
+	int abort = 1;
 
 	mtx_lock(&ep->com.lock);
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	switch (ep->com.state) {
 	case MPA_REQ_SENT:
+		__state_set(&ep->com, ABORTING);
 		connect_reply_upcall(ep, -ETIMEDOUT);
 		break;
 	case MPA_REQ_WAIT:
+		__state_set(&ep->com, ABORTING);
 		break;
 	case CLOSING:
 	case MORIBUND:
 		if (ep->com.cm_id && ep->com.qp)
 			err = 1;
+		__state_set(&ep->com, ABORTING);
 		break;
 	default:
-		panic("unknown state: %d\n", ep->com.state);
+		CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n",
+			__func__, ep, ep->com.state);
+		abort = 0;
 	}
-	__state_set(&ep->com, ABORTING);
 	mtx_unlock(&ep->com.lock);
 	if (err){
 		attrs.next_state = IWCH_QP_STATE_ERROR;
@@ -1206,7 +1159,8 @@
 			     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
 			     &attrs, 1);
 	}
-	abort_connection(ep);
+	if (abort)
+		abort_connection(ep);
 	put_ep(&ep->com);
 }
 
@@ -1228,6 +1182,7 @@
 		err = send_mpa_reject(ep, pdata, pdata_len);
 		err = soshutdown(ep->com.so, 3);
 	}
+	put_ep(&ep->com);
 	return 0;
 }
 
@@ -1242,8 +1197,10 @@
 	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
-	if (state_read(&ep->com) == DEAD)
-		return (-ECONNRESET);
+	if (state_read(&ep->com) == DEAD) {
+		err = -ECONNRESET;
+		goto err;
+	}
 
 	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
 	PANIC_IF(!qp);
@@ -1251,7 +1208,8 @@
 	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
 	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
 		abort_connection(ep);
-		return (-EINVAL);
+		err = -EINVAL;
+		goto err;
 	}
 
 	cm_id->add_ref(cm_id);
@@ -1263,11 +1221,10 @@
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
 	CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
-	get_ep(&ep->com);
 
 	/* bind QP to EP and move to RTS */
 	attrs.mpa_attr = ep->mpa_attr;
-	attrs.max_ird = ep->ord;
+	attrs.max_ird = ep->ird;
 	attrs.max_ord = ep->ord;
 	attrs.llp_stream_handle = ep;
 	attrs.next_state = IWCH_QP_STATE_RTS;
@@ -1283,20 +1240,21 @@
 			     ep->com.qp, mask, &attrs, 1);
 
 	if (err) 
-		goto err;
+		goto err1;
 
 	err = send_mpa_reply(ep, conn_param->private_data,
  			     conn_param->private_data_len);
 	if (err)
-		goto err;
+		goto err1;
 	state_set(&ep->com, FPDU_MODE);
 	established_upcall(ep);
 	put_ep(&ep->com);
 	return 0;
-err:
+err1:
 	ep->com.cm_id = NULL;
 	ep->com.qp = NULL;
 	cm_id->rem_ref(cm_id);
+err:
 	put_ep(&ep->com);
 	return err;
 }
@@ -1312,15 +1270,6 @@
 	epc->so->so_state |= SS_NBIO;
 	SOCK_UNLOCK(epc->so);
 	sopt.sopt_dir = SOPT_SET;
-	sopt.sopt_level = SOL_SOCKET;
-	sopt.sopt_name = SO_NO_DDP;
-	sopt.sopt_val = (caddr_t)&on;
-	sopt.sopt_valsize = sizeof on;
-	sopt.sopt_td = NULL;
-	err = sosetopt(epc->so, &sopt);
-	if (err) 
-		printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err);
-	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = IPPROTO_TCP;
 	sopt.sopt_name = TCP_NODELAY;
 	sopt.sopt_val = (caddr_t)&on;
@@ -1400,16 +1349,14 @@
 
 	if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
 		printf("%s - interface not TOE capable.\n", __FUNCTION__);
-		goto fail3;
+		RTFREE(rt);
+		goto fail2;
 	}
 	tdev = TOEDEV(rt->rt_ifp);
 	if (tdev == NULL) {
 		printf("%s - No toedev for interface.\n", __FUNCTION__);
-		goto fail3;
-	}
-	if (!tdev->tod_can_offload(tdev, ep->com.so)) {
-		printf("%s - interface cannot offload!.\n", __FUNCTION__);
-		goto fail3;
+		RTFREE(rt);
+		goto fail2;
 	}
 	RTFREE(rt);
 
@@ -1420,8 +1367,6 @@
 		ep->com.thread);
 	if (!err)
 		goto out;
-fail3:
-	RTFREE(ep->dst);
 fail2:
 	put_ep(&ep->com);
 out:
@@ -1458,7 +1403,7 @@
 		cm_id->provider_data = ep;
 		goto out;
 	}
-	close_socket(&ep->com);
+	close_socket(&ep->com, 0);
 fail:
 	cm_id->rem_ref(cm_id);
 	put_ep(&ep->com);
@@ -1474,7 +1419,7 @@
 	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
 
 	state_set(&ep->com, DEAD);
-	close_socket(&ep->com);
+	close_socket(&ep->com, 0);
 	cm_id->rem_ref(cm_id);
 	put_ep(&ep->com);
 	return 0;
@@ -1493,47 +1438,48 @@
 	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
 	     ep->com.so, states[ep->com.state], abrupt);
 
-	if (ep->com.state == DEAD) {
-		CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep);
-		goto out;
-	}
-
-	if (abrupt) {
-		if (ep->com.state != ABORTING) {
-			ep->com.state = ABORTING;
-			close = 1;
-		}
-		goto out;
-	}
-
 	switch (ep->com.state) {
 	case MPA_REQ_WAIT:
 	case MPA_REQ_SENT:
 	case MPA_REQ_RCVD:
 	case MPA_REP_SENT:
 	case FPDU_MODE:
-		start_ep_timer(ep);
-		ep->com.state = CLOSING;
 		close = 1;
+		if (abrupt)
+			ep->com.state = ABORTING;
+		else {
+			ep->com.state = CLOSING;
+			start_ep_timer(ep);
+		}
 		break;
 	case CLOSING:
-		ep->com.state = MORIBUND;
 		close = 1;
+		if (abrupt) {
+			stop_ep_timer(ep);
+			ep->com.state = ABORTING;
+		} else
+			ep->com.state = MORIBUND;
 		break;
 	case MORIBUND:
 	case ABORTING:
+	case DEAD:
+		CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n",
+			__func__, ep, ep->com.state);
 		break;
 	default:
 		panic("unknown state: %d\n", ep->com.state);
 		break;
 	}
-out:
+
 	mtx_unlock(&ep->com.lock);
 	if (close) {
 		if (abrupt)
 			abort_connection(ep);
-		else
+		else {
+			if (!ep->parent_ep)
+				__state_set(&ep->com, MORIBUND);
 			shutdown_socket(&ep->com);
+		}
 	}
 	return 0;
 }
@@ -1587,7 +1533,7 @@
 		send_mpa_req(ep);
 	} else {
 		connect_reply_upcall(ep, -ep->com.so->so_error);
-		close_socket(&ep->com);
+		close_socket(&ep->com, 0);
 		state_set(&ep->com, DEAD);
 		put_ep(&ep->com);
 	}
@@ -1643,10 +1589,20 @@
 	}
 	CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 
 		inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
+	child_ep->com.tdev = parent_ep->com.tdev;
+	child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family;
+	child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port;
+	child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr;
+	child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len;
+	child_ep->com.remote_addr.sin_family = remote->sin_family;
+	child_ep->com.remote_addr.sin_port = remote->sin_port;
+	child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr;
+	child_ep->com.remote_addr.sin_len = remote->sin_len;
 	child_ep->com.so = child_so;
 	child_ep->com.cm_id = NULL;
 	child_ep->com.thread = parent_ep->com.thread;
 	child_ep->parent_ep = parent_ep;
+
 	free(remote, M_SONAME);
 	get_ep(&parent_ep->com);
 	child_ep->parent_ep = parent_ep;
@@ -1747,17 +1703,30 @@
         }
         taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
         TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
-	t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
-	t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status);
-	return 0;
+	return (0);
 }
 
 void
 iwch_cm_term(void)
 {
-	t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
-	t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL);
+
 	taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
 	taskqueue_free(iw_cxgb_taskq);
 }
 
+void
+iwch_cm_init_cpl(struct adapter *sc)
+{
+
+	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
+	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status);
+}
+
+void
+iwch_cm_term_cpl(struct adapter *sc)
+{
+
+	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
+	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL);
+}
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h	Wed Jul 25 17:04:43 2012 +0300
@@ -25,14 +25,14 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 
 #ifndef _IWCH_CM_H_
 #define _IWCH_CM_H_
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
 #include <sys/refcount.h>
 #include <sys/condvar.h>
 #include <sys/proc.h>
@@ -42,21 +42,21 @@
 #define MPA_KEY_REP "MPA ID Rep Frame"
 
 #define MPA_MAX_PRIVATE_DATA	256
-#define MPA_REV		o0	/* XXX - amso1100 uses rev 0 ! */
+#define MPA_REV			0	/* XXX - amso1100 uses rev 0 ! */
 #define MPA_REJECT		0x20
 #define MPA_CRC			0x40
 #define MPA_MARKERS		0x80
 #define MPA_FLAGS_MASK		0xE0
 
 #define put_ep(ep) { \
-	CTR4(KTR_IW_CXGB, "put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__,  \
+	CTR4(KTR_IW_CXGB, "put_ep (via %s:%u) ep %p refcnt %d", __FUNCTION__, __LINE__,  \
 	     ep, atomic_load_acq_int(&((ep)->refcount))); \
 	if (refcount_release(&((ep)->refcount)))  \
 		__free_ep(ep); \
 }
 
 #define get_ep(ep) { \
-	CTR4(KTR_IW_CXGB, "get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \
+	CTR4(KTR_IW_CXGB, "get_ep (via %s:%u) ep %p, refcnt %d", __FUNCTION__, __LINE__, \
 	     ep, atomic_load_acq_int(&((ep)->refcount))); \
 	refcount_acquire(&((ep)->refcount));	  \
 }
@@ -148,7 +148,7 @@
 	TAILQ_ENTRY(iwch_ep_common) entry;
 	struct iw_cm_id *cm_id;
 	struct iwch_qp *qp;
-	struct t3cdev *tdev;
+	struct toedev *tdev;
 	enum iwch_ep_state state;
 	u_int refcount;
 	struct cv waitq;
@@ -176,7 +176,6 @@
 	u32 snd_seq;
 	u32 rcv_seq;
 	struct l2t_entry *l2t;
-	struct rtentry *dst;
 	struct mbuf *mpa_mbuf;
 	struct iwch_mpa_attributes mpa_attr;
 	unsigned int mpa_pkt_len;
@@ -237,13 +236,13 @@
 int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
 int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags);
-int iwch_quiesce_tid(struct iwch_ep *ep);
-int iwch_resume_tid(struct iwch_ep *ep);
 void __free_ep(struct iwch_ep_common *ep);
 void iwch_rearp(struct iwch_ep *ep);
 int iwch_ep_redirect(void *ctx, struct rtentry *old, struct rtentry *new, struct l2t_entry *l2t);
 
 int iwch_cm_init(void);
 void iwch_cm_term(void);
+void iwch_cm_init_cpl(struct adapter *);
+void iwch_cm_term_cpl(struct adapter *);
 
 #endif				/* _IWCH_CM_H_ */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c	Wed Jul 25 17:04:43 2012 +0300
@@ -28,13 +28,15 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -59,9 +61,11 @@
 #include <sys/libkern.h>
 
 #include <netinet/in.h>
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
 #include <cxgb_include.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -261,4 +265,4 @@
 		return npolled;
 	}
 }
-
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c	Wed Jul 25 17:04:43 2012 +0300
@@ -28,13 +28,14 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c 237263 2012-06-19 07:34:13Z np $");
+
+#include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -60,11 +61,13 @@
 
 #include <netinet/in.h>
 
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
-#ifdef DEBUG
+#if defined(INVARIANTS) && defined(TCP_OFFLOAD)
 #include <cxgb_include.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
@@ -74,75 +77,100 @@
 #include <ulp/iw_cxgb/iw_cxgb_resource.h>
 #include <ulp/iw_cxgb/iw_cxgb_user.h>
 
+static int
+cxio_rdma_get_mem(struct cxio_rdev *rdev, struct ch_mem_range *m)
+{
+	struct adapter *sc = rdev->adap;
+	struct mc7 *mem;
+
+	if ((m->addr & 7) || (m->len & 7))
+		return (EINVAL);
+	if (m->mem_id == MEM_CM)
+		mem = &sc->cm;
+	else if (m->mem_id == MEM_PMRX)
+		mem = &sc->pmrx;
+	else if (m->mem_id == MEM_PMTX)
+		mem = &sc->pmtx;
+	else
+		return (EINVAL);
+
+	return (t3_mc7_bd_read(mem, m->addr/8, m->len/8, (u64 *)m->buf));
+}
+
 void cxio_dump_tpt(struct cxio_rdev *rdev, uint32_t stag)
 {
-	struct ch_mem_range *m;
+	struct ch_mem_range m;
 	u64 *data;
+	u32 addr;
 	int rc;
 	int size = 32;
 
-	m = kmalloc(sizeof(*m) + size, M_NOWAIT);
-	if (!m) {
+	m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
+	if (m.buf == NULL) {
 		CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
 		return;
 	}
-	m->mem_id = MEM_PMRX;
-	m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
-	m->len = size;
-	CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m->addr, m->len);
-	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+	m.mem_id = MEM_PMRX;
+	m.addr = (stag >> 8) * 32 + rdev->rnic_info.tpt_base;
+	m.len = size;
+	CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m.addr, m.len);
+
+	rc = cxio_rdma_get_mem(rdev, &m);
 	if (rc) {
 		CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
-		free(m, M_DEVBUF);
+		free(m.buf, M_DEVBUF);
 		return;
 	}
 
-	data = (u64 *)m->buf;
+	data = (u64 *)m.buf;
+	addr = m.addr;
 	while (size > 0) {
-		CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", m->addr, (unsigned long long) *data);
+		CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", addr, (unsigned long long) *data);
 		size -= 8;
 		data++;
-		m->addr += 8;
+		addr += 8;
 	}
-	free(m, M_DEVBUF);
+	free(m.buf, M_DEVBUF);
 }
 
 void cxio_dump_pbl(struct cxio_rdev *rdev, uint32_t pbl_addr, uint32_t len, u8 shift)
 {
-	struct ch_mem_range *m;
+	struct ch_mem_range m;
 	u64 *data;
+	u32 addr;
 	int rc;
 	int size, npages;
 
 	shift += 12;
 	npages = (len + (1ULL << shift) - 1) >> shift;
 	size = npages * sizeof(u64);
-
-	m = kmalloc(sizeof(*m) + size, M_NOWAIT);
-	if (!m) {
+	m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
+	if (m.buf == NULL) {
 		CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
 		return;
 	}
-	m->mem_id = MEM_PMRX;
-	m->addr = pbl_addr;
-	m->len = size;
+	m.mem_id = MEM_PMRX;
+	m.addr = pbl_addr;
+	m.len = size;
 	CTR4(KTR_IW_CXGB, "%s PBL addr 0x%x len %d depth %d",
-		__FUNCTION__, m->addr, m->len, npages);
-	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+		__FUNCTION__, m.addr, m.len, npages);
+
+	rc = cxio_rdma_get_mem(rdev, &m);
 	if (rc) {
 		CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
-		free(m, M_DEVBUF);
+		free(m.buf, M_DEVBUF);
 		return;
 	}
 
-	data = (u64 *)m->buf;
+	data = (u64 *)m.buf;
+	addr = m.addr;
 	while (size > 0) {
-		CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", m->addr, (unsigned long long) *data);
+		CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", addr, (unsigned long long) *data);
 		size -= 8;
 		data++;
-		m->addr += 8;
+		addr += 8;
 	}
-	free(m, M_DEVBUF);
+	free(m.buf, M_DEVBUF);
 }
 
 void cxio_dump_wqe(union t3_wr *wqe)
@@ -175,70 +203,76 @@
 
 void cxio_dump_rqt(struct cxio_rdev *rdev, uint32_t hwtid, int nents)
 {
-	struct ch_mem_range *m;
+	struct ch_mem_range m;
 	int size = nents * 64;
 	u64 *data;
+	u32 addr;
 	int rc;
 
-	m = kmalloc(sizeof(*m) + size, M_NOWAIT);
-	if (!m) {
+	m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
+	if (m.buf == NULL) {
 		CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
 		return;
 	}
-	m->mem_id = MEM_PMRX;
-	m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
-	m->len = size;
-	CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m->addr, m->len);
-	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+	m.mem_id = MEM_PMRX;
+	m.addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
+	m.len = size;
+	CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m.addr, m.len);
+
+	rc = cxio_rdma_get_mem(rdev, &m);
 	if (rc) {
 		CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
-		free(m, M_DEVBUF);
+		free(m.buf, M_DEVBUF);
 		return;
 	}
 
-	data = (u64 *)m->buf;
+	data = (u64 *)m.buf;
+	addr = m.addr;
 	while (size > 0) {
-		CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", m->addr, (unsigned long long) *data);
+		CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", addr, (unsigned long long) *data);
 		size -= 8;
 		data++;
-		m->addr += 8;
+		addr += 8;
 	}
-	free(m, M_DEVBUF);
+	free(m.buf, M_DEVBUF);
 }
 
 void cxio_dump_tcb(struct cxio_rdev *rdev, uint32_t hwtid)
 {
-	struct ch_mem_range *m;
+	struct ch_mem_range m;
 	int size = TCB_SIZE;
 	uint32_t *data;
+	uint32_t addr;
 	int rc;
 
-	m = kmalloc(sizeof(*m) + size, M_NOWAIT);
-	if (!m) {
+	m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
+	if (m.buf == NULL) {
 		CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
 		return;
 	}
-	m->mem_id = MEM_CM;
-	m->addr = hwtid * size;
-	m->len = size;
-	CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m->addr, m->len);
-	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+	m.mem_id = MEM_CM;
+	m.addr = hwtid * size;
+	m.len = size;
+	CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m.addr, m.len);
+
+	rc = cxio_rdma_get_mem(rdev, &m);
 	if (rc) {
 		CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
-		free(m, M_DEVBUF);
+		free(m.buf, M_DEVBUF);
 		return;
 	}
 
-	data = (uint32_t *)m->buf;
+	data = (uint32_t *)m.buf;
+	addr = m.addr;
 	while (size > 0) {
 		printf("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
-			m->addr,
+			addr,
 			*(data+2), *(data+3), *(data),*(data+1),
 			*(data+6), *(data+7), *(data+4), *(data+5));
 		size -= 32;
 		data += 8;
-		m->addr += 32;
+		addr += 32;
 	}
-	free(m, M_DEVBUF);
+	free(m.buf, M_DEVBUF);
 }
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c	Wed Jul 25 17:04:43 2012 +0300
@@ -27,13 +27,15 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -59,9 +61,11 @@
 
 #include <netinet/in.h>
 
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
 #include <cxgb_include.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -81,11 +85,22 @@
 	struct ib_event event;
 	struct iwch_qp_attributes attrs;
 
+	mtx_lock(&rnicp->lock);
+
+	if (!qhp) {
+                CTR3(KTR_IW_CXGB, "%s unaffiliated error 0x%x qpid 0x%x\n",
+                       __func__, CQE_STATUS(rsp_msg->cqe),
+                       CQE_QPID(rsp_msg->cqe));
+                mtx_unlock(&rnicp->lock);
+                return;
+        }
+
 	if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
 	    (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
 		CTR4(KTR_IW_CXGB, "%s AE received after RTS - "
 		     "qp state %d qpid 0x%x status 0x%x", __FUNCTION__,
 		     qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+                mtx_unlock(&rnicp->lock);
 		return;
 	}
 
@@ -95,6 +110,15 @@
 	       CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
 	       CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
 
+        mtx_unlock(&rnicp->lock);
+
+	if (qhp->attr.state == IWCH_QP_STATE_RTS) {
+                attrs.next_state = IWCH_QP_STATE_TERMINATE;
+                iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
+                               &attrs, 1);
+                if (send_term)
+                        iwch_post_terminate(qhp, rsp_msg);
+        }
 
 	event.event = ib_event;
 	event.device = chp->ibcq.device;
@@ -106,25 +130,17 @@
 	if (qhp->ibqp.event_handler)
 		(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
 
-	if (qhp->attr.state == IWCH_QP_STATE_RTS) {
-		attrs.next_state = IWCH_QP_STATE_TERMINATE;
-		iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
-			       &attrs, 1);
-		if (send_term)
-			iwch_post_terminate(qhp, rsp_msg);
-	}
+	(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
 }
 
 void
-iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m)
+iwch_ev_dispatch(struct iwch_dev *rnicp, struct mbuf *m)
 {
-	struct iwch_dev *rnicp;
 	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
 	struct iwch_cq *chp;
 	struct iwch_qp *qhp;
 	u32 cqid = RSPQ_CQID(rsp_msg);
 
-	rnicp = (struct iwch_dev *) rdev_p->ulp;
 	mtx_lock(&rnicp->lock);
 	chp = get_chp(rnicp, cqid);
 	qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
@@ -136,7 +152,7 @@
 		       CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
 		       CQE_WRID_LOW(rsp_msg->cqe));
 		mtx_unlock(&rnicp->lock);
-		goto out;
+		return;
 	}
 	iwch_qp_add_ref(&qhp->ibqp);
 	mtx_lock(&chp->lock);
@@ -200,12 +216,6 @@
 	case TPT_ERR_BOUND:
 	case TPT_ERR_INVALIDATE_SHARED_MR:
 	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
-		log(LOG_ERR, "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
-		       "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
-		       CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
-		       CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
-		       CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
-		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
 		post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
 		break;
 
@@ -248,6 +258,5 @@
 	        wakeup(chp);
 	mtx_unlock(&chp->lock);
 	iwch_qp_rem_ref(&qhp->ibqp);
-out:
-	m_free(m);
 }
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,4 +1,3 @@
-
 /**************************************************************************
 
 Copyright (c) 2007, Chelsio Inc.
@@ -28,13 +27,15 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -47,6 +48,8 @@
 #include <sys/linker.h>
 #include <sys/firmware.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockopt.h>
 #include <sys/sockio.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -59,12 +62,25 @@
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
+#include <net/route.h>
+#include <netinet/in_systm.h>
 #include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/toecore.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
 
-#include <contrib/rdma/ib_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
 #include <cxgb_include.h>
 #include <ulp/tom/cxgb_l2t.h>
+#include <ulp/tom/cxgb_tom.h>
+#include <ulp/tom/cxgb_toepcb.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
@@ -72,29 +88,21 @@
 #include <ulp/iw_cxgb/iw_cxgb.h>
 #include <ulp/iw_cxgb/iw_cxgb_resource.h>
 
-static TAILQ_HEAD( ,cxio_rdev) rdev_list;
-static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
+/* Response queue used for RDMA events. */
+#define ASYNC_NOTIF_RSPQ 0
+static inline int
+cxio_rdma_cq_setup(struct cxio_rdev *rdev_p, unsigned id, uint64_t base_addr,
+    unsigned size, unsigned ovfl_mode, unsigned credits, unsigned credit_thres)
+{
+	struct adapter *sc = rdev_p->adap;
+	int rc;
 
-static struct cxio_rdev *
-cxio_hal_find_rdev_by_name(char *dev_name)
-{
-	struct cxio_rdev *rdev;
+	mtx_lock_spin(&sc->sge.reg_lock);
+	rc = -t3_sge_init_cqcntxt(sc, id, base_addr, size, ASYNC_NOTIF_RSPQ,
+	    ovfl_mode, credits, credit_thres);
+	mtx_unlock_spin(&sc->sge.reg_lock);
 
-	TAILQ_FOREACH(rdev, &rdev_list, entry)
-		if (!strcmp(rdev->dev_name, dev_name))
-			return rdev;
-	return NULL;
-}
-
-struct cxio_rdev *
-cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
-{
-	struct cxio_rdev *rdev;
-
-	TAILQ_FOREACH(rdev, &rdev_list, entry)
-		if (rdev->t3cdev_p == tdev)
-			return rdev;
-	return NULL;
+	return (rc);
 }
 
 int
@@ -104,12 +112,14 @@
 	int ret;
 	struct t3_cqe *cqe;
 	u32 rptr;
+	struct adapter *sc = rdev_p->adap;
 
-	struct rdma_cq_op setup;
-	setup.id = cq->cqid;
-	setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
-	setup.op = op;
-	ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
+	if (op != CQ_CREDIT_UPDATE)
+		credit = 0;
+
+	mtx_lock_spin(&sc->sge.reg_lock);
+	ret = t3_sge_cqcntxt_op(sc, cq->cqid, op, credit);
+	mtx_unlock_spin(&sc->sge.reg_lock);
 
 	if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
 		return (ret);
@@ -140,30 +150,26 @@
 		while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
 			DELAY(1);
 			if (i++ > 1000000) {
+				struct adapter *sc = rdev_p->adap;
+
+				log(LOG_ERR, "%s: stalled rnic\n",
+				    device_get_nameunit(sc->dev));
 				PANIC_IF(1);
-				log(LOG_ERR, "%s: stalled rnic\n",
-				       rdev_p->dev_name);
 				return (-EIO);
 			}
 		}
 
-		return 1;
+		return (1);
 	}
 
-	return 0;
+	return (0);
 }
 
 static int
 cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
 {
-	struct rdma_cq_setup setup;
-	setup.id = cqid;
-	setup.base_addr = 0;	/* NULL address */
-	setup.size = 0;		/* disaable the CQ */
-	setup.credits = 0;
-	setup.credit_thres = 0;
-	setup.ovfl_mode = 0;
-	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+
+	return (cxio_rdma_cq_setup(rdev_p, cqid, 0, 0, 0, 0, 0));
 }
 
 static int
@@ -171,43 +177,38 @@
 {
 	u64 sge_cmd;
 	struct t3_modify_qp_wr *wqe;
-	struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT);
+	struct mbuf *m;
+       
+	m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe);
 	if (m == NULL) {
 		CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
 		return (-ENOMEM);
 	}
 	wqe = mtod(m, struct t3_modify_qp_wr *);
-	m->m_len = m->m_pkthdr.len = sizeof(*wqe);
 	memset(wqe, 0, sizeof(*wqe));
 	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
 	wqe->flags = htobe32(MODQP_WRITE_EC);
 	sge_cmd = qpid << 8 | 3;
 	wqe->sge_cmd = htobe64(sge_cmd);
-	m_set_priority(m, CPL_PRIORITY_CONTROL);
-	m_set_sgl(m, NULL);
-	m_set_sgllen(m, 0);
-	return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
+	return t3_offload_tx(rdev_p->adap, m);
 }
 
 int
-cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
 {
-	struct rdma_cq_setup setup;
 	int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
 	
+	size += 1; /* one extra page for storing cq-in-err state */
 	cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
 	if (!cq->cqid)
 		return (-ENOMEM);
-	cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO);
-	if (!cq->sw_queue)
-		return (-ENOMEM);
-#if 0	
-	cq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev,
-					     (1UL << (cq->size_log2)) *
-					     sizeof(struct t3_cqe),
-					     &(cq->dma_addr), M_NOWAIT);
-#else
-	cq->queue = contigmalloc((1UL << (cq->size_log2))*sizeof(struct t3_cqe),
+	if (kernel) {
+		cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO);
+		if (!cq->sw_queue)
+			return (-ENOMEM);
+	}
+
+	cq->queue = contigmalloc(size,
 	    M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
 	if (cq->queue)
 		cq->dma_addr = vtophys(cq->queue);
@@ -215,35 +216,10 @@
 		free(cq->sw_queue, M_DEVBUF);
 		return (-ENOMEM);
 	}
-#endif
-	
-#ifdef notyet	
-	pci_unmap_addr_set(cq, mapping, cq->dma_addr);
-#endif
 	memset(cq->queue, 0, size);
-	setup.id = cq->cqid;
-	setup.base_addr = (u64) (cq->dma_addr);
-	setup.size = 1UL << cq->size_log2;
-	setup.credits = 65535;
-	setup.credit_thres = 1;
-	if (rdev_p->t3cdev_p->type != T3A)
-		setup.ovfl_mode = 0;
-	else
-		setup.ovfl_mode = 1;
-	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
 
-int
-cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
-	struct rdma_cq_setup setup;
-	setup.id = cq->cqid;
-	setup.base_addr = (u64) (cq->dma_addr);
-	setup.size = 1UL << cq->size_log2;
-	setup.credits = setup.size;
-	setup.credit_thres = setup.size;	/* TBD: overflow recovery */
-	setup.ovfl_mode = 1;
-	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+	return (cxio_rdma_cq_setup(rdev_p, cq->cqid, cq->dma_addr,
+	    1UL << cq->size_log2, 0, 65535, 1));
 }
 
 static u32
@@ -325,7 +301,7 @@
 	if (!wq->qpid)
 		return (-ENOMEM);
 
-	wq->rq = malloc(depth * sizeof(u64), M_DEVBUF, M_NOWAIT|M_ZERO);
+	wq->rq = malloc(depth * sizeof(struct t3_swrq), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (!wq->rq)
 		goto err1;
 
@@ -336,28 +312,19 @@
 	wq->sq = malloc(depth * sizeof(struct t3_swsq), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (!wq->sq)
 		goto err3;
-#if 0
-	wq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev,
-					     depth * sizeof(union t3_wr),
-					     &(wq->dma_addr), M_NOWAIT);
-#else
 	wq->queue = contigmalloc(depth *sizeof(union t3_wr),
 	    M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
 	if (wq->queue)
 		wq->dma_addr = vtophys(wq->queue);
-
-#endif
-	if (!wq->queue)
+	else
 		goto err4;
 
 	memset(wq->queue, 0, depth * sizeof(union t3_wr));
-#ifdef notyet	
-	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
-#endif
 	wq->doorbell = rdev_p->rnic_info.kdb_addr;
 	if (!kernel_domain)
 		wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
 					(wq->qpid << rdev_p->qpshift);
+	wq->rdev = rdev_p;
 	CTR4(KTR_IW_CXGB, "%s qpid 0x%x doorbell 0x%p udb 0x%llx", __FUNCTION__,
 	     wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
 	return 0;
@@ -431,10 +398,11 @@
 	cq->sw_wptr++;
 }
 
-void
+int
 cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
 {
 	u32 ptr;
+	int flushed = 0;
 
 	CTR3(KTR_IW_CXGB, "%s wq %p cq %p", __FUNCTION__, wq, cq);
 
@@ -442,8 +410,11 @@
 	CTR4(KTR_IW_CXGB, "%s rq_rptr %u rq_wptr %u skip count %u", __FUNCTION__,
 	    wq->rq_rptr, wq->rq_wptr, count);
 	ptr = wq->rq_rptr + count;
-	while (ptr++ != wq->rq_wptr)
+	while (ptr++ != wq->rq_wptr) {
 		insert_recv_cqe(wq, cq);
+		flushed++;
+	}
+       	return flushed;
 }
 
 static void
@@ -468,19 +439,22 @@
 	cq->sw_wptr++;
 }
 
-void
+int
 cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
 {
 	__u32 ptr;
+	int flushed = 0;
 	struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
 
 	ptr = wq->sq_rptr + count;
-	sqp += count;
+	sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
 	while (ptr != wq->sq_wptr) {
 		insert_sq_cqe(wq, cq, sqp);
-		sqp++;
 		ptr++;
+		sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+		flushed++;
 	}
+	return flushed;
 }
 
 /*
@@ -516,7 +490,7 @@
 	if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
 		return 0;
 
-	if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
+	if (CQE_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
 	    Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
 		return 0;
 
@@ -563,16 +537,8 @@
 static int
 cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
 {
-	struct rdma_cq_setup setup;
-	setup.id = 0;
-	setup.base_addr = 0;	/* NULL address */
-	setup.size = 1;		/* enable the CQ */
-	setup.credits = 0;
 
-	/* force SGE to redirect to RspQ and interrupt */
-	setup.credit_thres = 0;
-	setup.ovfl_mode = 1;
-	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+	return (cxio_rdma_cq_setup(rdev_p, 0, 0, 1, 1, 0, 0));
 }
 
 static int
@@ -584,41 +550,28 @@
 	struct t3_modify_qp_wr *wqe;
 	struct mbuf *m;
 
-	m = m_gethdr(MT_DATA, M_NOWAIT);
+	m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe);
 	if (m == NULL) {
 		CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
-		return (-ENOMEM);
+		return (ENOMEM);
 	}
 	err = cxio_hal_init_ctrl_cq(rdev_p);
 	if (err) {
 		CTR2(KTR_IW_CXGB, "%s err %d initializing ctrl_cq", __FUNCTION__, err);
 		goto err;
 	}
-#if 0	
-	rdev_p->ctrl_qp.workq = dma_alloc_coherent(
-		rdev_p->rnic_info.pdev,
-		    (1 << T3_CTRL_QP_SIZE_LOG2) *
-		    sizeof(union t3_wr),
-		    &(rdev_p->ctrl_qp.dma_addr),
-		    M_NOWAIT);
-#else
+
 	rdev_p->ctrl_qp.workq = contigmalloc((1 << T3_CTRL_QP_SIZE_LOG2) 
 	    *sizeof(union t3_wr), M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
 	if (rdev_p->ctrl_qp.workq)
 		rdev_p->ctrl_qp.dma_addr = vtophys(rdev_p->ctrl_qp.workq);
-
-#endif	
-	
-	if (!rdev_p->ctrl_qp.workq) {
+	else {
 		CTR1(KTR_IW_CXGB, "%s dma_alloc_coherent failed", __FUNCTION__);
-		err = -ENOMEM;
+		err = ENOMEM;
 		goto err;
 	}
-#if 0	
-	pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
-			   rdev_p->ctrl_qp.dma_addr);
-#endif	
-	rdev_p->ctrl_qp.doorbell = (void /*__iomem */ *)rdev_p->rnic_info.kdb_addr;
+
+	rdev_p->ctrl_qp.doorbell = rdev_p->rnic_info.kdb_addr;
 	memset(rdev_p->ctrl_qp.workq, 0,
 	       (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
 
@@ -637,10 +590,8 @@
 	ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
 			V_EC_TYPE(0) | V_EC_GEN(1) |
 			V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
-	wqe = mtod(m, struct t3_modify_qp_wr *);
-	m->m_len = m->m_pkthdr.len = sizeof(*wqe);
 	memset(wqe, 0, sizeof(*wqe));
-	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0,
+	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
 		       T3_CTL_QP_TID, 7);
 	wqe->flags = htobe32(MODQP_WRITE_EC);
 	sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
@@ -650,12 +601,9 @@
 	CTR3(KTR_IW_CXGB, "CtrlQP dma_addr 0x%llx workq %p size %d",
 	     (unsigned long long) rdev_p->ctrl_qp.dma_addr,
 	     rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
-	m_set_priority(m, CPL_PRIORITY_CONTROL);
-	m_set_sgl(m, NULL);
-	m_set_sgllen(m, 0);
-	return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
+	return t3_offload_tx(rdev_p->adap, m);
 err:
-	m_free(m);
+	m_freem(m);
 	return err;
 }
 
@@ -681,7 +629,7 @@
  */
 static int
 cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
-				      u32 len, void *data, int completion)
+				      u32 len, void *data)
 {
 	u32 i, nr_wqe, copy_len;
 	u8 *copy_data;
@@ -718,7 +666,7 @@
 		flag = 0;
 		if (i == (nr_wqe - 1)) {
 			/* last WQE */
-			flag = completion ? T3_COMPLETION_FLAG : 0;
+			flag = T3_COMPLETION_FLAG;
 			if (len % 32)
 				utx_len = len / 32 + 1;
 			else
@@ -786,14 +734,13 @@
 __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
 			 u32 *stag, u8 stag_state, u32 pdid,
 			 enum tpt_mem_type type, enum tpt_mem_perm perm,
-			 u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
-			 u32 *pbl_size, u32 *pbl_addr)
+			 u32 zbva, u64 to, u32 len, u8 page_size,
+			 u32 pbl_size, u32 pbl_addr)
 {
 	int err;
 	struct tpt_entry tpt;
 	u32 stag_idx;
 	u32 wptr;
-	int rereg = (*stag != T3_STAG_UNSET);
 
 	stag_state = stag_state > 0;
 	stag_idx = (*stag) >> 8;
@@ -807,30 +754,8 @@
 	CTR5(KTR_IW_CXGB, "%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x",
 	     __FUNCTION__, stag_state, type, pdid, stag_idx);
 
-	if (reset_tpt_entry)
-		cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
-	else if (!rereg) {
-		*pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
-		if (!*pbl_addr) {
-			return (-ENOMEM);
-		}
-	}
-
 	mtx_lock(&rdev_p->ctrl_qp.lock);
 
-	/* write PBL first if any - update pbl only if pbl list exist */
-	if (pbl) {
-
-		CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
-		     __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base,
-		     *pbl_size);
-		err = cxio_hal_ctrl_qp_write_mem(rdev_p,
-				(*pbl_addr >> 5),
-				(*pbl_size << 3), pbl, 0);
-		if (err)
-			goto ret;
-	}
-
 	/* write TPT entry */
 	if (reset_tpt_entry)
 		memset(&tpt, 0, sizeof(tpt));
@@ -845,23 +770,23 @@
 				V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
 				V_TPT_PAGE_SIZE(page_size));
 		tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
-				    htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
+				    htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
 		tpt.len = htobe32(len);
 		tpt.va_hi = htobe32((u32) (to >> 32));
 		tpt.va_low_or_fbo = htobe32((u32) (to & 0xFFFFFFFFULL));
 		tpt.rsvd_bind_cnt_or_pstag = 0;
 		tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
-				  htobe32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
+				  htobe32(V_TPT_PBL_SIZE((pbl_size) >> 2));
 	}
 	err = cxio_hal_ctrl_qp_write_mem(rdev_p,
 				       stag_idx +
 				       (rdev_p->rnic_info.tpt_base >> 5),
-				       sizeof(tpt), &tpt, 1);
+				       sizeof(tpt), &tpt);
 
 	/* release the stag index to free pool */
 	if (reset_tpt_entry)
 		cxio_hal_put_stag(rdev_p->rscp, stag_idx);
-ret:
+
 	wptr = rdev_p->ctrl_qp.wptr;
 	mtx_unlock(&rdev_p->ctrl_qp.lock);
 	if (!err)
@@ -872,61 +797,90 @@
 	return err;
 }
 
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+			u32 pbl_addr, u32 pbl_size)
+{
+	u32 wptr;
+	int err;
+
+	CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
+		__func__, pbl_addr, rdev_p->rnic_info.pbl_base,
+		pbl_size);
+
+	mtx_lock(&rdev_p->ctrl_qp.lock);
+	err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
+					pbl);
+	wptr = rdev_p->ctrl_qp.wptr;
+	mtx_unlock(&rdev_p->ctrl_qp.lock);
+	if (err)
+		return err;
+
+	if (cxio_wait(&rdev_p->ctrl_qp,
+                        &rdev_p->ctrl_qp.lock,
+                        SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr)))
+		return ERESTART;
+
+	return 0;
+}
+
 int
 cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
 			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
-			   u8 page_size, __be64 *pbl, u32 *pbl_size,
-			   u32 *pbl_addr)
+			   u8 page_size, u32 pbl_size, u32 pbl_addr)
 {
 	*stag = T3_STAG_UNSET;
 	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
-			     zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+			     zbva, to, len, page_size, pbl_size, pbl_addr);
 }
 
 int
 cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
 			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
-			   u8 page_size, __be64 *pbl, u32 *pbl_size,
-			   u32 *pbl_addr)
+			   u8 page_size, u32 pbl_size, u32 pbl_addr)	
 {
 	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
-			     zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+			     zbva, to, len, page_size, pbl_size, pbl_addr);
 }
 
 int
 cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
 		   u32 pbl_addr)
 {
-	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
-			     &pbl_size, &pbl_addr);
+	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+			     pbl_size, pbl_addr);
 }
 
 int
 cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
 {
-	u32 pbl_size = 0;
 	*stag = T3_STAG_UNSET;
 	return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
-			     NULL, &pbl_size, NULL);
+			     0, 0);
 }
 
 int
 cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
 {
-	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
-			     NULL, NULL);
+	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+			     0, 0);
 }
 
 int
-cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr,
+    struct socket *so)
 {
 	struct t3_rdma_init_wr *wqe;
-	struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT);
+	struct mbuf *m;
+	struct ofld_hdr *oh;
+	int rc;
+	struct tcpcb *tp;
+	struct inpcb *inp;
+	struct toepcb *toep;
+
+	m = M_GETHDR_OFLD(0, CPL_PRIORITY_DATA, wqe);
 	if (m == NULL)
 		return (-ENOMEM);
 	CTR2(KTR_IW_CXGB, "%s rdev_p %p", __FUNCTION__, rdev_p);
-	wqe = mtod(m, struct t3_rdma_init_wr *);
-	m->m_len = m->m_pkthdr.len = sizeof(*wqe);
 	wqe->wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_INIT));
 	wqe->wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(attr->tid) |
 					   V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
@@ -940,36 +894,41 @@
 	wqe->mpaattrs = attr->mpaattrs;
 	wqe->qpcaps = attr->qpcaps;
 	wqe->ulpdu_size = htobe16(attr->tcp_emss);
-	wqe->flags = htobe32(attr->flags);
+	wqe->rqe_count = htobe16(attr->rqe_count);
+	wqe->flags_rtr_type = htobe16(attr->flags |
+					V_RTR_TYPE(attr->rtr_type) |
+					V_CHAN(attr->chan));	
 	wqe->ord = htobe32(attr->ord);
 	wqe->ird = htobe32(attr->ird);
 	wqe->qp_dma_addr = htobe64(attr->qp_dma_addr);
 	wqe->qp_dma_size = htobe32(attr->qp_dma_size);
 	wqe->irs = htobe32(attr->irs);
-	m_set_priority(m, 0);	/* 0=>ToeQ; 1=>CtrlQ */
-	m_set_sgl(m, NULL);
-	m_set_sgllen(m, 0);
-	return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
-}
 
-void
-cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
-	cxio_ev_cb = ev_cb;
-}
+	/* XXX: bad form, fix later */
+	inp = sotoinpcb(so);
+	INP_WLOCK(inp);
+	tp = intotcpcb(inp);
+	toep = tp->t_toe;
+	oh = mtod(m, struct ofld_hdr *);
+	oh->plen = 0;
+	oh->flags |= F_HDR_DF;
+	enqueue_wr(toep, m);
+	toep->tp_wr_avail--;
+	toep->tp_wr_unacked++;
+	rc = t3_offload_tx(rdev_p->adap, m);
+	INP_WUNLOCK(inp);
 
-void
-cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
-	cxio_ev_cb = NULL;
+	return (rc);
 }
 
 static int
-cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct mbuf *m)
+cxio_hal_ev_handler(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	static int cnt;
-	struct cxio_rdev *rdev_p = NULL;
+	struct adapter *sc = qs->adap;
+	struct iwch_dev *rnicp = sc->iwarp_softc;
+	struct cxio_rdev *rdev_p = &rnicp->rdev;
 	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
+	int qpid = CQE_QPID(rsp_msg->cqe);
 	
 	CTR6(KTR_IW_CXGB, "%s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x",
 	     __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
@@ -978,80 +937,50 @@
 	     RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
 	     RSPQ_CREDIT_THRESH(rsp_msg));
 	CTR4(KTR_IW_CXGB, "CQE: QPID 0x%0x type 0x%0x status 0x%0x opcode %d",
-	     CQE_QPID(rsp_msg->cqe), 
-	     CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
-	     CQE_OPCODE(rsp_msg->cqe));
+	    qpid, CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+	    CQE_OPCODE(rsp_msg->cqe));
 	CTR3(KTR_IW_CXGB, "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
 	     CQE_LEN(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
-	rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
-	if (!rdev_p) {
-		CTR2(KTR_IW_CXGB, "%s called by t3cdev %p with null ulp", __FUNCTION__,
-		     t3cdev_p);
-		return 0;
-	}
-	if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
+
+	switch(qpid) {
+	case T3_CTRL_QP_ID:
 		mtx_lock(&rdev_p->ctrl_qp.lock);
 		rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
 		wakeup(&rdev_p->ctrl_qp);
 		mtx_unlock(&rdev_p->ctrl_qp.lock);
-		m_free(m);
-	} else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
-		m_free(m);
-	else if (cxio_ev_cb)
-		(*cxio_ev_cb) (rdev_p, m);
-	else
-		m_free(m);
-	cnt++;
-	return 0;
+		break;
+	case 0xfff8:
+		break;
+	default:
+		iwch_ev_dispatch(rnicp, m);
+	}
+
+	m_freem(m);
+	return (0);
 }
 
 /* Caller takes care of locking if needed */
 int
 cxio_rdev_open(struct cxio_rdev *rdev_p)
 {
-	struct ifnet *ifp;
 	int err = 0;
+	struct rdma_info *ri = &rdev_p->rnic_info;
+	struct adapter *sc = rdev_p->adap;
 
-	if (strlen(rdev_p->dev_name)) {
-		if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
-			return (-EBUSY);
-		}
-		ifp = rdev_p->ifp; 
-		if (ifp == NULL) 
-			return (-EINVAL);
-		if_free(ifp);
-	} else if (rdev_p->t3cdev_p) {
-		if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) 
-			return (-EBUSY);
-		ifp = rdev_p->t3cdev_p->lldev;
-		strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
-			T3_MAX_DEV_NAME_LEN);
-	} else {
-		CTR1(KTR_IW_CXGB, "%s t3cdev_p or dev_name must be set", __FUNCTION__);
-		return (-EINVAL);
-	}
+	KASSERT(rdev_p->adap, ("%s: adap is NULL", __func__));
 
-	TAILQ_INSERT_TAIL(&rdev_list, rdev_p, entry);
+	memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
 
-	CTR2(KTR_IW_CXGB, "%s opening rnic dev %s", __FUNCTION__, rdev_p->dev_name);
-	memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
-	if (!rdev_p->t3cdev_p)
-		rdev_p->t3cdev_p = T3CDEV(ifp);
-	rdev_p->t3cdev_p->ulp = (void *) rdev_p;
-	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
-					 &(rdev_p->rnic_info));
-	if (err) {
-		log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n",
-		     __FUNCTION__, rdev_p->t3cdev_p, err);
-		goto err1;
-	}
-	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
-				    &(rdev_p->port_info));
-	if (err) {
-		log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n",
-		     __FUNCTION__, rdev_p->t3cdev_p, err);
-		goto err1;
-	}
+	ri->udbell_physbase = rman_get_start(sc->udbs_res);
+	ri->udbell_len = rman_get_size(sc->udbs_res);
+	ri->tpt_base = t3_read_reg(sc, A_ULPTX_TPT_LLIMIT);
+	ri->tpt_top  = t3_read_reg(sc, A_ULPTX_TPT_ULIMIT);
+	ri->pbl_base = t3_read_reg(sc, A_ULPTX_PBL_LLIMIT);
+	ri->pbl_top  = t3_read_reg(sc, A_ULPTX_PBL_ULIMIT);
+	ri->rqt_base = t3_read_reg(sc, A_ULPRX_RQ_LLIMIT);
+	ri->rqt_top  = t3_read_reg(sc, A_ULPRX_RQ_ULIMIT);
+	ri->kdb_addr =  (void *)((unsigned long)
+	    rman_get_virtual(sc->regs_res) + A_SG_KDOORBELL);
 
 	/*
 	 * qpshift is the number of bits to shift the qpid left in order
@@ -1064,8 +993,8 @@
 					      PAGE_SHIFT));
 	rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
 	rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
-	CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d",
-	     rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+	CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %p info: tpt_base 0x%0x tpt_top 0x%0x num stags %d",
+	     rdev_p->adap, rdev_p->rnic_info.tpt_base,
 	     rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p));
 	CTR4(KTR_IW_CXGB, "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x",
 	     rdev_p->rnic_info.pbl_base,
@@ -1111,43 +1040,34 @@
 err2:
 	cxio_hal_destroy_ctrl_qp(rdev_p);
 err1:
-	TAILQ_REMOVE(&rdev_list, rdev_p, entry);
 	return err;
 }
 
 void
 cxio_rdev_close(struct cxio_rdev *rdev_p)
 {
-	if (rdev_p) {
-		cxio_hal_pblpool_destroy(rdev_p);
-		cxio_hal_rqtpool_destroy(rdev_p);
-		TAILQ_REMOVE(&rdev_list, rdev_p, entry);
-		rdev_p->t3cdev_p->ulp = NULL;
-		cxio_hal_destroy_ctrl_qp(rdev_p);
-		cxio_hal_destroy_resource(rdev_p->rscp);
-	}
+	cxio_hal_pblpool_destroy(rdev_p);
+	cxio_hal_rqtpool_destroy(rdev_p);
+	cxio_hal_destroy_ctrl_qp(rdev_p);
+	cxio_hal_destroy_resource(rdev_p->rscp);
 }
 
 int
-cxio_hal_init(void)
+cxio_hal_init(struct adapter *sc)
 {
-	TAILQ_INIT(&rdev_list);
 #ifdef needed
 	if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
-		return (-ENOMEM);
+		return (ENOMEM);
 #endif
-	t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
-	return 0;
+	t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
+
+	return (0);
 }
 
 void
-cxio_hal_exit(void)
+cxio_hal_uninit(struct adapter *sc)
 {
-	struct cxio_rdev *rdev, *tmp;
-
-	t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
-	TAILQ_FOREACH_SAFE(rdev, &rdev_list, entry, tmp)
-		cxio_rdev_close(rdev);
+	t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, NULL);
 #ifdef needed
 	cxio_hal_destroy_rhdl_resource();
 #endif
@@ -1304,11 +1224,12 @@
 		}
 
 		/* incoming SEND with no receive posted failures */
-		if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
+		if (CQE_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
 		    Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
 			ret = -1;
 			goto skip_cqe;
 		}
+		PANIC_IF((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
 		goto proc_cqe;
 	}
 
@@ -1323,6 +1244,13 @@
 		 * then we complete this with TPT_ERR_MSN and mark the wq in
 		 * error.
 		 */
+		
+		if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
+			wq->error = 1;
+			ret = -1;
+			goto skip_cqe;
+		}
+
 		if (__predict_false((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
 			wq->error = 1;
 			hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
@@ -1367,13 +1295,17 @@
 		wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
 		CTR2(KTR_IW_CXGB, "%s completing sq idx %ld", __FUNCTION__,
 		     Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
-		*cookie = (wq->sq +
-			   Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
+		*cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
 		wq->sq_rptr++;
 	} else {
 		CTR2(KTR_IW_CXGB, "%s completing rq idx %ld", __FUNCTION__,
 		     Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
-		*cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+		*cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
+		if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
+                        cxio_hal_pblpool_free(wq->rdev,
+                                wq->rq[Q_PTR2IDX(wq->rq_rptr,
+                                wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
+		PANIC_IF(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
 		wq->rq_rptr++;
 	}
 
@@ -1404,5 +1336,4 @@
 	}
 	return ret;
 }
-
-
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h	Wed Jul 25 17:04:43 2012 +0300
@@ -25,7 +25,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 #ifndef  __CXIO_HAL_H__
@@ -45,7 +45,11 @@
 #define T3_MAX_NUM_PD (1<<15)
 #define T3_MAX_PBL_SIZE 256
 #define T3_MAX_RQ_SIZE 1024
+#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
+#define T3_MAX_CQ_DEPTH 65536
 #define T3_MAX_NUM_STAG (1<<15)
+#define T3_MAX_MR_SIZE 0x100000000ULL
+#define T3_PAGESIZE_MASK 0xffff000  /* 4KB-128MB */
 
 #define T3_STAG_UNSET 0xffffffff
 
@@ -55,12 +59,9 @@
 	u32 wptr;
 	u32 rptr;
 	struct mtx lock;	/* for the wtpr, can sleep */
-#ifdef notyet
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-#endif	
 	union t3_wr *workq;	/* the work request queue */
 	bus_addr_t dma_addr;	/* pci bus address of the workq */
-	void /* __iomem */ *doorbell;
+	void *doorbell;
 };
 
 struct cxio_hal_resource {
@@ -85,13 +86,10 @@
 };
 
 struct cxio_rdev {
-	char dev_name[T3_MAX_DEV_NAME_LEN];
-	struct t3cdev *t3cdev_p;
+	struct adapter *adap;
 	struct rdma_info rnic_info;
-	struct adap_ports port_info;
 	struct cxio_hal_resource *rscp;
 	struct cxio_hal_ctrl_qp ctrl_qp;
-	void *ulp;
 	unsigned long qpshift;
 	u32 qpnr;
 	u32 qpmask;
@@ -139,9 +137,8 @@
 void cxio_rdev_close(struct cxio_rdev *rdev);
 int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
 		   enum t3_cq_opcode op, u32 credit);
-int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
 int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
 void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
 void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
 int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
@@ -149,27 +146,27 @@
 int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
 		    struct cxio_ucontext *uctx);
 int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+		   u32 pbl_addr, u32 pbl_size);
 int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
 			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
-			   u8 page_size, __be64 *pbl, u32 *pbl_size,
-			   u32 *pbl_addr);
+			   u8 page_size, u32 pbl_size, u32 pbl_addr);
 int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
 			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
-			   u8 page_size, __be64 *pbl, u32 *pbl_size,
-			   u32 *pbl_addr);
+			   u8 page_size, u32 pbl_size, u32 pbl_addr);
 int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
 		   u32 pbl_addr);
 int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
 int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
-int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr,
+    struct socket *so);
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
-int cxio_hal_init(void);
+int cxio_hal_init(struct adapter *);
+void cxio_hal_uninit(struct adapter *);
 void cxio_hal_exit(void);
-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
 void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
 void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
 void cxio_flush_hw_cq(struct t3_cq *cq);
@@ -178,7 +175,7 @@
 
 #define MOD "iw_cxgb: "
 
-#ifdef DEBUG
+#ifdef INVARIANTS
 void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
 void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint32_t len, u8 shift);
 void cxio_dump_wqe(union t3_wr *wqe);
@@ -187,60 +184,7 @@
 void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
 #endif
 
-
- static unsigned char hiBitSetTab[] = {
-    0, 1, 2, 2, 3, 3, 3, 3,
-    4, 4, 4, 4, 4, 4, 4, 4,
-    5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5,
-    6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6,
-    7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7
-
-};
-
-
-static __inline
-int ilog2(unsigned long val)
-{
-    unsigned long   tmp;
-
-    tmp = val >> 24;
-    if (tmp) {
-        return hiBitSetTab[tmp] + 23;
-    }
-    tmp = (val >> 16) & 0xff;
-    if (tmp) {
-        return hiBitSetTab[tmp] + 15;
-    }
-    tmp = (val >> 8) & 0xff;
-    if (tmp) {
-        return hiBitSetTab[tmp] + 7;
-
-    }
-    return hiBitSetTab[val & 0xff] - 1;
-} 
-
 #define cxfree(a) free((a), M_DEVBUF);
-#define kmalloc(a, b) malloc((a), M_DEVBUF, (b))
-#define kzalloc(a, b) malloc((a), M_DEVBUF, (b)|M_ZERO)
-
-static __inline __attribute__((const))
-unsigned long roundup_pow_of_two(unsigned long n)
-{
-	return 1UL << flsl(n - 1);
-}
-
-#define PAGE_ALIGN(x) roundup2((x), PAGE_SIZE)
 
 #include <sys/blist.h>
 struct gen_pool {
@@ -259,6 +203,7 @@
 	if (gp == NULL)
 		return (NULL);
 	
+	memset(gp, 0, sizeof(struct gen_pool));
 	gp->gen_list = blist_create(len >> chunk_shift, M_NOWAIT);
 	if (gp->gen_list == NULL) {
 		free(gp, M_DEVBUF);
@@ -323,8 +268,7 @@
 	mtx_unlock(lockp); \
 	__ret; \
 }) 
-extern struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev);
 
-#define KTR_IW_CXGB KTR_SPARE4
+#define KTR_IW_CXGB KTR_SPARE3
 
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h	Wed Jul 25 17:04:43 2012 +0300
@@ -0,0 +1,22 @@
+#ifndef  __IB_INTFC_H__
+#define  __IB_INTFC_H__
+
+/* $FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h 237263 2012-06-19 07:34:13Z np $ */
+
+#undef prefetch
+#undef WARN_ON
+#undef max_t
+#undef udelay
+#undef le32_to_cpu
+#undef le16_to_cpu
+#undef cpu_to_le32
+#undef swab32
+#undef container_of
+
+#undef LIST_HEAD
+#define LIST_HEAD(name, type)                                           \
+struct name {                                                           \
+        struct type *lh_first;  /* first element */                     \
+}
+
+#endif /* __IB_INTFC_H__ */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c	Wed Jul 25 17:04:43 2012 +0300
@@ -27,13 +27,15 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -59,9 +61,11 @@
 
 #include <netinet/in.h>
 
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
 #include <cxgb_include.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -72,14 +76,24 @@
 #include <ulp/iw_cxgb/iw_cxgb_resource.h>
 #include <ulp/iw_cxgb/iw_cxgb_user.h>
 
+static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
+{
+	u32 mmid;
+
+	mhp->attr.state = 1;
+	mhp->attr.stag = stag;
+	mmid = stag >> 8;
+	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+	CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __func__, mmid, mhp);
+	return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+}
+
 int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
 					struct iwch_mr *mhp,
-					int shift,
-					__be64 *page_list)
+					int shift)
 {
 	u32 stag;
-	u32 mmid;
-
+	int ret;
 
 	if (cxio_register_phys_mem(&rhp->rdev,
 				   &stag, mhp->attr.pdid,
@@ -87,28 +101,24 @@
 				   mhp->attr.zbva,
 				   mhp->attr.va_fbo,
 				   mhp->attr.len,
-				   shift-12,
-				   page_list,
-				   &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+				   shift - 12,
+				   mhp->attr.pbl_size, mhp->attr.pbl_addr))
 		return (-ENOMEM);
-	mhp->attr.state = 1;
-	mhp->attr.stag = stag;
-	mmid = stag >> 8;
-	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
-	CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp);
-	return 0;
+
+	ret = iwch_finish_mem_reg(mhp, stag);
+	if (ret)
+		cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+			mhp->attr.pbl_addr);
+	return ret;
 }
 
 int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
 					struct iwch_mr *mhp,
 					int shift,
-					__be64 *page_list,
 					int npages)
 {
 	u32 stag;
-	u32 mmid;
-
+	int ret;
 
 	/* We could support this... */
 	if (npages > mhp->attr.pbl_size)
@@ -121,17 +131,40 @@
 				   mhp->attr.zbva,
 				   mhp->attr.va_fbo,
 				   mhp->attr.len,
-				   shift-12,
-				   page_list,
-				   &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+				   shift - 12,
+				   mhp->attr.pbl_size, mhp->attr.pbl_addr))
 		return (-ENOMEM);
-	mhp->attr.state = 1;
-	mhp->attr.stag = stag;
-	mmid = stag >> 8;
-	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
-	CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp);
+	
+	ret = iwch_finish_mem_reg(mhp, stag);
+        if (ret)
+                cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+                        mhp->attr.pbl_addr);
+        return ret;
+}
+
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
+{
+	mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
+						    npages << 3);
+
+	if (!mhp->attr.pbl_addr)
+		return -ENOMEM;
+
+	mhp->attr.pbl_size = npages;
+
 	return 0;
+ }
+
+void iwch_free_pbl(struct iwch_mr *mhp)
+{
+	cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+			      mhp->attr.pbl_size << 3);
+}
+
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
+{
+	return cxio_write_pbl(&mhp->rhp->rdev, pages,
+			      mhp->attr.pbl_addr + (offset << 3), npages);
 }
 
 int build_phys_page_list(struct ib_phys_buf *buffer_list,
@@ -204,3 +237,4 @@
 	return 0;
 
 }
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c	Wed Jul 25 17:04:43 2012 +0300
@@ -27,13 +27,15 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -62,9 +64,12 @@
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
+
 
 #include <cxgb_include.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -180,6 +185,8 @@
 	struct iwch_create_cq_resp uresp;
 	struct iwch_create_cq_req ureq;
 	struct iwch_ucontext *ucontext = NULL;
+	static int warned;
+	size_t resplen;
 
 	CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries);
 	rhp = to_iwch_dev(ibdev);
@@ -214,7 +221,7 @@
 	entries = roundup_pow_of_two(entries);
 	chp->cq.size_log2 = ilog2(entries);
 
-	if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
+	if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) {
 		cxfree(chp);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -222,7 +229,11 @@
 	chp->ibcq.cqe = 1 << chp->cq.size_log2;
 	mtx_init(&chp->lock, "cxgb cq", NULL, MTX_DEF|MTX_DUPOK);
 	chp->refcnt = 1;
-	insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+	if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
+		cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+		cxfree(chp);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	if (ucontext) {
 		struct iwch_mm_entry *mm;
@@ -238,15 +249,27 @@
 		uresp.key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
 		mtx_unlock(&ucontext->mmap_lock);
-		if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+		mm->key = uresp.key;
+		mm->addr = vtophys(chp->cq.queue);
+               	if (udata->outlen < sizeof uresp) {
+                	if (!warned++)
+                        	CTR1(KTR_IW_CXGB, "%s Warning - "
+                                	"downlevel libcxgb3 (non-fatal).\n",
+					__func__);
+                       	mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
+                       				sizeof(struct t3_cqe));
+                       	resplen = sizeof(struct iwch_create_cq_resp_v0);
+               	} else {
+                	mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
+                        			sizeof(struct t3_cqe));
+                       	uresp.memsize = mm->len;
+                      	resplen = sizeof uresp;
+               	}
+              	if (ib_copy_to_udata(udata, &uresp, resplen)) {
 			cxfree(mm);
 			iwch_destroy_cq(&chp->ibcq);
 			return ERR_PTR(-EFAULT);
 		}
-		mm->key = uresp.key;
-		mm->addr = vtophys(chp->cq.queue);
-		mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
-					     sizeof (struct t3_cqe));
 		insert_mmap(ucontext, mm);
 	}
 	CTR4(KTR_IW_CXGB, "created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx",
@@ -256,72 +279,11 @@
 }
 
 static int
-iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+iwch_resize_cq(struct ib_cq *cq __unused, int cqe __unused,
+    struct ib_udata *udata __unused)
 {
-#ifdef notyet
-	struct iwch_cq *chp = to_iwch_cq(cq);
-	struct t3_cq oldcq, newcq;
-	int ret;
 
-	CTR3(KTR_IW_CXGB, "%s ib_cq %p cqe %d", __FUNCTION__, cq, cqe);
-
-	/* We don't downsize... */
-	if (cqe <= cq->cqe)
-		return 0;
-
-	/* create new t3_cq with new size */
-	cqe = roundup_pow_of_two(cqe+1);
-	newcq.size_log2 = ilog2(cqe);
-
-	/* Dont allow resize to less than the current wce count */
-	if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
-		return (-ENOMEM);
-	}
-
-	/* Quiesce all QPs using this CQ */
-	ret = iwch_quiesce_qps(chp);
-	if (ret) {
-		return (ret);
-	}
-
-	ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
-	if (ret) {
-		return (ret);
-	}
-
-	/* copy CQEs */
-	memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
-				        sizeof(struct t3_cqe));
-
-	/* old iwch_qp gets new t3_cq but keeps old cqid */
-	oldcq = chp->cq;
-	chp->cq = newcq;
-	chp->cq.cqid = oldcq.cqid;
-
-	/* resize new t3_cq to update the HW context */
-	ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
-	if (ret) {
-		chp->cq = oldcq;
-		return ret;
-	}
-	chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
-
-	/* destroy old t3_cq */
-	oldcq.cqid = newcq.cqid;
-	ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
-	if (ret) {
-		log(LOG_ERR, "%s - cxio_destroy_cq failed %d\n",
-			__FUNCTION__, ret);
-	}
-
-	/* add user hooks here */
-
-	/* resume qps */
-	ret = iwch_resume_qps(chp);
-	return ret;
-#else
 	return (-ENOSYS);
-#endif
 }
 
 static int
@@ -357,67 +319,12 @@
 	return err;
 }
 
-#ifdef notyet
 static int
-iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+iwch_mmap(struct ib_ucontext *context __unused, struct vm_area_struct *vma __unused)
 {
-#ifdef notyet	
-	int len = vma->vm_end - vma->vm_start;
-	u32 key = vma->vm_pgoff << PAGE_SHIFT;
-	struct cxio_rdev *rdev_p;
-	int ret = 0;
-	struct iwch_mm_entry *mm;
-	struct iwch_ucontext *ucontext;
-	u64 addr;
 
-	CTR4(KTR_IW_CXGB, "%s pgoff 0x%lx key 0x%x len %d", __FUNCTION__, vma->vm_pgoff,
-	     key, len);
-
-	if (vma->vm_start & (PAGE_SIZE-1)) {
-	        return (-EINVAL);
-	}
-
-	rdev_p = &(to_iwch_dev(context->device)->rdev);
-	ucontext = to_iwch_ucontext(context);
-
-	mm = remove_mmap(ucontext, key, len);
-	if (!mm)
-		return (-EINVAL);
-	addr = mm->addr;
-	cxfree(mm);
-
-	if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
-	    (addr < (rdev_p->rnic_info.udbell_physbase +
-		       rdev_p->rnic_info.udbell_len))) {
-
-		/*
-		 * Map T3 DB register.
-		 */
-		if (vma->vm_flags & VM_READ) {
-			return (-EPERM);
-		}
-
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-		vma->vm_flags &= ~VM_MAYREAD;
-		ret = io_remap_pfn_range(vma, vma->vm_start,
-					 addr >> PAGE_SHIFT,
-				         len, vma->vm_page_prot);
-	} else {
-
-		/*
-		 * Map WQ or CQ contig dma memory...
-		 */
-		ret = remap_pfn_range(vma, vma->vm_start,
-				      addr >> PAGE_SHIFT,
-				      len, vma->vm_page_prot);
-	}
-
-	return ret;
-#endif
-	return (0);
+	return (-ENOSYS);
 }
-#endif
 
 static int iwch_deallocate_pd(struct ib_pd *pd)
 {
@@ -470,7 +377,7 @@
 
 	CTR2(KTR_IW_CXGB, "%s ib_mr %p", __FUNCTION__, ib_mr);
 	/* There can be no memory windows */
-	if (atomic_load_acq_int(&ib_mr->usecnt))
+	if (atomic_load_acq_int(&ib_mr->usecnt.counter))
 		return (-EINVAL);
 
 	mhp = to_iwch_mr(ib_mr);
@@ -478,6 +385,7 @@
 	mmid = mhp->attr.stag >> 8;
 	cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
 		       mhp->attr.pbl_addr);
+	iwch_free_pbl(mhp);
 	remove_handle(rhp, &rhp->mmidr, mmid);
 	if (mhp->kva)
 		cxfree((void *) (unsigned long) mhp->kva);
@@ -511,6 +419,8 @@
 	if (!mhp)
 		return ERR_PTR(-ENOMEM);
 
+	mhp->rhp = rhp;
+
 	/* First check that we have enough alignment */
 	if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
 		ret = -EINVAL;
@@ -528,7 +438,17 @@
 	if (ret)
 		goto err;
 
-	mhp->rhp = rhp;
+	ret = iwch_alloc_pbl(mhp, npages);
+	if (ret) {
+		cxfree(page_list);
+		goto err_pbl;
+	}
+
+	ret = iwch_write_pbl(mhp, page_list, npages, 0);
+	cxfree(page_list);
+	if (ret)
+		goto err;
+
 	mhp->attr.pdid = php->pdid;
 	mhp->attr.zbva = 0;
 
@@ -538,15 +458,18 @@
 
 	mhp->attr.len = (u32) total_size;
 	mhp->attr.pbl_size = npages;
-	ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
-	cxfree(page_list);
-	if (ret) {
-		goto err;
-	}
+	ret = iwch_register_mem(rhp, php, mhp, shift);
+	if (ret)
+		goto err_pbl;
+
 	return &mhp->ibmr;
+
+err_pbl:
+	iwch_free_pbl(mhp);
+
 err:
 	cxfree(mhp);
-	return ERR_PTR(-ret);
+	return ERR_PTR(ret);
 
 }
 
@@ -570,7 +493,7 @@
 	CTR3(KTR_IW_CXGB, "%s ib_mr %p ib_pd %p", __FUNCTION__, mr, pd);
 
 	/* There can be no memory windows */
-	if (atomic_load_acq_int(&mr->usecnt))
+	if (atomic_load_acq_int(&mr->usecnt.counter))
 		return (-EINVAL);
 
 	mhp = to_iwch_mr(mr);
@@ -596,7 +519,7 @@
 			return ret;
 	}
 
-	ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
+	ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
 	cxfree(page_list);
 	if (ret) {
 		return ret;
@@ -640,7 +563,9 @@
 	if (!mhp)
 		return ERR_PTR(-ENOMEM);
 
-	mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+	mhp->rhp = rhp;
+
+	mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
 	if (IS_ERR(mhp->umem)) {
 		err = PTR_ERR(mhp->umem);
 		cxfree(mhp);
@@ -650,18 +575,22 @@
 	shift = ffs(mhp->umem->page_size) - 1;
 
 	n = 0;
-	TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
+	list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
 		n += chunk->nents;
 
-	pages = kmalloc(n * sizeof(u64), M_NOWAIT);
+	err = iwch_alloc_pbl(mhp, n);
+	if (err)
+		goto err;
+
+	pages = (__be64 *) kmalloc(n * sizeof(u64), M_NOWAIT);
 	if (!pages) {
 		err = -ENOMEM;
-		goto err;
+		goto err_pbl;
 	}
 
 	i = n = 0;
 
-#if 0	
+#ifdef notyet
 	TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
 		for (j = 0; j < chunk->nmap; ++j) {
 			len = sg_dma_len(&chunk->page_list[j]) >> shift;
@@ -669,21 +598,36 @@
 				pages[i++] = htobe64(sg_dma_address(
 					&chunk->page_list[j]) +
 					mhp->umem->page_size * k);
+				if (i == PAGE_SIZE / sizeof *pages) {
+					err = iwch_write_pbl(mhp, pages, i, n);
+					if (err)
+						goto pbl_done;
+					n += i;
+					i = 0;
+				}
 			}
 		}
 #endif
-	mhp->rhp = rhp;
+
+	if (i)
+		err = iwch_write_pbl(mhp, pages, i, n);
+#ifdef notyet
+pbl_done:
+#endif
+	cxfree(pages);
+	if (err)
+		goto err_pbl;
+
 	mhp->attr.pdid = php->pdid;
 	mhp->attr.zbva = 0;
 	mhp->attr.perms = iwch_ib_to_tpt_access(acc);
 	mhp->attr.va_fbo = virt;
 	mhp->attr.page_size = shift - 12;
 	mhp->attr.len = (u32) length;
-	mhp->attr.pbl_size = i;
-	err = iwch_register_mem(rhp, php, mhp, shift, pages);
-	cxfree(pages);
+	
+	err = iwch_register_mem(rhp, php, mhp, shift);
 	if (err)
-		goto err;
+		goto err_pbl;
 
 	if (udata && !t3a_device(rhp)) {
 		uresp.pbl_addr = (mhp->attr.pbl_addr -
@@ -700,6 +644,9 @@
 
 	return &mhp->ibmr;
 
+err_pbl:
+	iwch_free_pbl(mhp);
+
 err:
 	ib_umem_release(mhp->umem);
 	cxfree(mhp);
@@ -748,7 +695,12 @@
 	mhp->attr.type = TPT_MW;
 	mhp->attr.stag = stag;
 	mmid = (stag) >> 8;
-	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+	mhp->ibmw.rkey = stag;
+	if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+		cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+		cxfree(mhp);
+		return ERR_PTR(-ENOMEM);
+	}	
 	CTR4(KTR_IW_CXGB, "%s mmid 0x%x mhp %p stag 0x%x", __FUNCTION__, mmid, mhp, stag);
 	return &(mhp->ibmw);
 }
@@ -893,7 +845,13 @@
 
 	mtx_init(&qhp->lock, "cxgb qp", NULL, MTX_DEF|MTX_DUPOK);
 	qhp->refcnt = 1;
-	insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+	if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
+		cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+		cxfree(qhp);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	if (udata) {
 
@@ -1023,12 +981,14 @@
 {
 	struct iwch_dev *dev;
 	struct port_info *pi;
+	struct adapter *sc;
 
 	CTR5(KTR_IW_CXGB, "%s ibdev %p, port %d, index %d, gid %p",
 	       __FUNCTION__, ibdev, port, index, gid);
 	dev = to_iwch_dev(ibdev);
+	sc = dev->rdev.adap;
 	PANIC_IF(port == 0 || port > 2);
-	pi = ((struct port_info *)dev->rdev.port_info.lldevs[port-1]->if_softc);
+	pi = &sc->port[port - 1];
 	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
 	memcpy(&(gid->raw[0]), pi->hw_addr, 6);
 	return 0;
@@ -1037,21 +997,20 @@
 static int iwch_query_device(struct ib_device *ibdev,
 			     struct ib_device_attr *props)
 {
+	struct iwch_dev *dev;
+	struct adapter *sc;
 
-	struct iwch_dev *dev;
 	CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
 
 	dev = to_iwch_dev(ibdev);
+	sc = dev->rdev.adap;
 	memset(props, 0, sizeof *props);
-#ifdef notyet	
-	memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->if_addr.ifa_addr, 6);
-#endif	
+	memcpy(&props->sys_image_guid, sc->port[0].hw_addr, 6);
 	props->device_cap_flags = dev->device_cap_flags;
-#ifdef notyet
-	props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
-	props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
-#endif
-	props->max_mr_size = ~0ull;
+	props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
+	props->vendor_id = pci_get_vendor(sc->dev);
+	props->vendor_part_id = pci_get_device(sc->dev);
+	props->max_mr_size = dev->attr.max_mr_size;
 	props->max_qp = dev->attr.max_qps;
 	props->max_qp_wr = dev->attr.max_wrs;
 	props->max_sge = dev->attr.max_sge_per_wr;
@@ -1071,13 +1030,10 @@
 			   u8 port, struct ib_port_attr *props)
 {
 	CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
+	memset(props, 0, sizeof(struct ib_port_attr));
 	props->max_mtu = IB_MTU_4096;
-	props->lid = 0;
-	props->lmc = 0;
-	props->sm_lid = 0;
-	props->sm_sl = 0;
+	props->active_mtu = IB_MTU_2048;
 	props->state = IB_PORT_ACTIVE;
-	props->phys_state = 0;
 	props->port_cap_flags =
 	    IB_PORT_CM_SUP |
 	    IB_PORT_SNMP_TUNNEL_SUP |
@@ -1086,7 +1042,6 @@
 	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
 	props->gid_tbl_len = 1;
 	props->pkey_tbl_len = 1;
-	props->qkey_viol_cntr = 0;
 	props->active_width = 2;
 	props->active_speed = 2;
 	props->max_msg_sz = -1;
@@ -1094,80 +1049,18 @@
 	return 0;
 }
 
-#ifdef notyet
-static ssize_t show_rev(struct class_device *cdev, char *buf)
-{
-	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
-					    ibdev.class_dev);
-	CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
-	return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type);
-}
-
-static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
-{
-	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
-					    ibdev.class_dev);
-	struct ethtool_drvinfo info;
-	struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
-
-	CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
-	lldev->ethtool_ops->get_drvinfo(lldev, &info);
-	return sprintf(buf, "%s\n", info.fw_version);
-}
-
-static ssize_t show_hca(struct class_device *cdev, char *buf)
-{
-	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
-					    ibdev.class_dev);
-	struct ethtool_drvinfo info;
-	struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
-
-	CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
-	lldev->ethtool_ops->get_drvinfo(lldev, &info);
-	return sprintf(buf, "%s\n", info.driver);
-}
-
-static ssize_t show_board(struct class_device *cdev, char *buf)
-{
-	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
-					    ibdev.class_dev);
-	CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, dev);
-#ifdef notyet
-	return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor,
-		                       dev->rdev.rnic_info.pdev->device);
-#else
-	return sprintf(buf, "%x.%x\n", 0xdead, 0xbeef);	 /* XXX */
-#endif
-}
-
-static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct class_device_attribute *iwch_class_attributes[] = {
-	&class_device_attr_hw_rev,
-	&class_device_attr_fw_ver,
-	&class_device_attr_hca_type,
-	&class_device_attr_board_id
-};
-#endif
-
 int iwch_register_device(struct iwch_dev *dev)
 {
 	int ret;
-#ifdef notyet	
-	int i;
-#endif
+	struct adapter *sc = dev->rdev.adap;
+
 	CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev);
 	strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
 	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
-#ifdef notyet	
-	memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
-#endif	
+	memcpy(&dev->ibdev.node_guid, sc->port[0].hw_addr, 6);
 	dev->device_cap_flags =
-	    (IB_DEVICE_ZERO_STAG |
-	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+		(IB_DEVICE_LOCAL_DMA_LKEY |
+		 IB_DEVICE_MEM_WINDOW);
 
 	dev->ibdev.uverbs_cmd_mask =
 	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1189,9 +1082,9 @@
 	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
 	dev->ibdev.node_type = RDMA_NODE_RNIC;
 	memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
-	dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
+	dev->ibdev.phys_port_cnt = sc->params.nports;
 	dev->ibdev.num_comp_vectors = 1;
-	dev->ibdev.dma_device = dev->rdev.rnic_info.pdev;
+	dev->ibdev.dma_device = dev->rdev.adap->dev;
 	dev->ibdev.query_device = iwch_query_device;
 	dev->ibdev.query_port = iwch_query_port;
 	dev->ibdev.modify_port = iwch_modify_port;
@@ -1199,9 +1092,7 @@
 	dev->ibdev.query_gid = iwch_query_gid;
 	dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
 	dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
-#ifdef notyet	
 	dev->ibdev.mmap = iwch_mmap;
-#endif	
 	dev->ibdev.alloc_pd = iwch_allocate_pd;
 	dev->ibdev.dealloc_pd = iwch_deallocate_pd;
 	dev->ibdev.create_ah = iwch_ah_create;
@@ -1229,11 +1120,13 @@
 	dev->ibdev.req_notify_cq = iwch_arm_cq;
 	dev->ibdev.post_send = iwch_post_send;
 	dev->ibdev.post_recv = iwch_post_receive;
-
+	dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
 
 	dev->ibdev.iwcm =
-	    (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
-					   M_NOWAIT);
+	    kmalloc(sizeof(struct iw_cm_verbs), M_NOWAIT);
+	if (!dev->ibdev.iwcm)
+		return (ENOMEM);
+
 	dev->ibdev.iwcm->connect = iwch_connect;
 	dev->ibdev.iwcm->accept = iwch_accept_cr;
 	dev->ibdev.iwcm->reject = iwch_reject_cr;
@@ -1246,35 +1139,19 @@
 	ret = ib_register_device(&dev->ibdev);
 	if (ret)
 		goto bail1;
-#ifdef notyet
-	for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
-		ret = class_device_create_file(&dev->ibdev.class_dev,
-					       iwch_class_attributes[i]);
-		if (ret) {
-			goto bail2;
-		}
-	}
-#endif	
-	return 0;
-#ifdef notyet	
-bail2:
-#endif	
-	ib_unregister_device(&dev->ibdev);
+
+	return (0);
+
 bail1:
-	return ret;
+	cxfree(dev->ibdev.iwcm);
+	return (ret);
 }
 
 void iwch_unregister_device(struct iwch_dev *dev)
 {
-#ifdef notyet
-	int i;
 
-	CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev);
-
-	for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
-		class_device_remove_file(&dev->ibdev.class_dev,
-					 iwch_class_attributes[i]);
-#endif	
 	ib_unregister_device(&dev->ibdev);
+	cxfree(dev->ibdev.iwcm);
 	return;
 }
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h	Wed Jul 25 17:04:43 2012 +0300
@@ -25,13 +25,13 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 #ifndef __IWCH_PROVIDER_H__
 #define __IWCH_PROVIDER_H__
 
-#include <contrib/rdma/ib_verbs.h>
+#include <rdma/ib_verbs.h>
 
 struct iwch_pd {
 	struct ib_pd ibpd;
@@ -116,6 +116,7 @@
 };
 
 struct iwch_mpa_attributes {
+	u8 initiator;
 	u8 recv_marker_enabled;
 	u8 xmit_marker_enabled;	/* iWARP: enable inbound Read Resp. */
 	u8 crc_enabled;
@@ -336,18 +337,17 @@
 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
 int iwch_register_device(struct iwch_dev *dev);
 void iwch_unregister_device(struct iwch_dev *dev);
-int iwch_quiesce_qps(struct iwch_cq *chp);
-int iwch_resume_qps(struct iwch_cq *chp);
 void stop_read_rep_timer(struct iwch_qp *qhp);
 int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
 					struct iwch_mr *mhp,
-					int shift,
-					__be64 *page_list);
+					int shift);
 int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
 					struct iwch_mr *mhp,
 					int shift,
-					__be64 *page_list,
 					int npages);
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
+void iwch_free_pbl(struct iwch_mr *mhp);
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
 int build_phys_page_list(struct ib_phys_buf *buffer_list,
 					int num_phys_buf,
 					u64 *iova_start,
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,4 +1,3 @@
-
 /**************************************************************************
 
 Copyright (c) 2007, Chelsio Inc.
@@ -28,13 +27,15 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -48,6 +49,7 @@
 #include <sys/linker.h>
 #include <sys/firmware.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -57,14 +59,26 @@
 #include <sys/proc.h>
 #include <sys/queue.h>
 
+#include <net/route.h>
+#include <netinet/in_systm.h>
 #include <netinet/in.h>
+#include <netinet/toecore.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
 
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
 #include <cxgb_include.h>
 #include <ulp/tom/cxgb_l2t.h>
+#include <ulp/tom/cxgb_toepcb.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
@@ -75,7 +89,7 @@
 
 #define NO_SUPPORT -1
 
-static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
 				u8 * flit_cnt)
 {
 	int i;
@@ -83,59 +97,46 @@
 
 	switch (wr->opcode) {
 	case IB_WR_SEND:
-	case IB_WR_SEND_WITH_IMM:
 		if (wr->send_flags & IB_SEND_SOLICITED)
 			wqe->send.rdmaop = T3_SEND_WITH_SE;
 		else
 			wqe->send.rdmaop = T3_SEND;
 		wqe->send.rem_stag = 0;
 		break;
-#if 0				/* Not currently supported */
-	case TYPE_SEND_INVALIDATE:
-	case TYPE_SEND_INVALIDATE_IMMEDIATE:
-		wqe->send.rdmaop = T3_SEND_WITH_INV;
-		wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
+	case IB_WR_SEND_WITH_IMM:
+		if (wr->send_flags & IB_SEND_SOLICITED)
+			wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+		else
+			wqe->send.rdmaop = T3_SEND_WITH_INV;
+		wqe->send.rem_stag = 0;
 		break;
-	case TYPE_SEND_SE_INVALIDATE:
-		wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
-		wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
-		break;
-#endif
 	default:
-		break;
+		return -EINVAL;
 	}
 	if (wr->num_sge > T3_MAX_SGE)
 		return (-EINVAL);
 	wqe->send.reserved[0] = 0;
 	wqe->send.reserved[1] = 0;
 	wqe->send.reserved[2] = 0;
-	if (wr->opcode == IB_WR_SEND_WITH_IMM) {
-		plen = 4;
-		wqe->send.sgl[0].stag = wr->imm_data;
-		wqe->send.sgl[0].len = 0;
-		wqe->send.num_sgle = 0;
-		*flit_cnt = 5;
-	} else {
-		plen = 0;
-		for (i = 0; i < wr->num_sge; i++) {
-			if ((plen + wr->sg_list[i].length) < plen) {
-				return (-EMSGSIZE);
-			}
-			plen += wr->sg_list[i].length;
-			wqe->send.sgl[i].stag =
-			    htobe32(wr->sg_list[i].lkey);
-			wqe->send.sgl[i].len =
-			    htobe32(wr->sg_list[i].length);
-			wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
+	plen = 0;
+	for (i = 0; i < wr->num_sge; i++) {
+		if ((plen + wr->sg_list[i].length) < plen) {
+			return (-EMSGSIZE);
 		}
-		wqe->send.num_sgle = htobe32(wr->num_sge);
-		*flit_cnt = 4 + ((wr->num_sge) << 1);
+		plen += wr->sg_list[i].length;
+		wqe->send.sgl[i].stag =
+		    htobe32(wr->sg_list[i].lkey);
+		wqe->send.sgl[i].len =
+		    htobe32(wr->sg_list[i].length);
+		wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
 	}
+	wqe->send.num_sgle = htobe32(wr->num_sge);
+	*flit_cnt = 4 + ((wr->num_sge) << 1);
 	wqe->send.plen = htobe32(plen);
 	return 0;
 }
 
-static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
 				 u8 *flit_cnt)
 {
 	int i;
@@ -152,7 +153,7 @@
 
 	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
 		plen = 4;
-		wqe->write.sgl[0].stag = wr->imm_data;
+		wqe->write.sgl[0].stag = wr->ex.imm_data;
 		wqe->write.sgl[0].len = 0;
 		wqe->write.num_sgle = 0; 
 		*flit_cnt = 6;
@@ -177,7 +178,7 @@
 	return 0;
 }
 
-static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
 				u8 *flit_cnt)
 {
 	if (wr->num_sge > 1)
@@ -195,15 +196,12 @@
 	return 0;
 }
 
-/*
- * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
- */
 static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
 			    u32 num_sgle, u32 * pbl_addr, u8 * page_size)
 {
 	int i;
 	struct iwch_mr *mhp;
-	u32 offset;
+	u64 offset;
 	for (i = 0; i < num_sgle; i++) {
 
 		mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
@@ -235,8 +233,8 @@
 			return (-EINVAL);
 		}
 		offset = sg_list[i].addr - mhp->attr.va_fbo;
-		offset += ((u32) mhp->attr.va_fbo) %
-		          (1UL << (12 + mhp->attr.page_size));
+		offset += mhp->attr.va_fbo &
+			  ((1UL << (12 + mhp->attr.page_size)) - 1);
 		pbl_addr[i] = ((mhp->attr.pbl_addr -
 			        rhp->rdev.rnic_info.pbl_base) >> 3) +
 			      (offset >> (12 + mhp->attr.page_size));
@@ -245,26 +243,113 @@
 	return 0;
 }
 
-static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
+static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
 				struct ib_recv_wr *wr)
 {
-	int i;
-	if (wr->num_sge > T3_MAX_SGE)
+       int i, err = 0;
+       u32 pbl_addr[T3_MAX_SGE];
+       u8 page_size[T3_MAX_SGE];
+
+       if (wr->num_sge > T3_MAX_SGE)
 		return (-EINVAL);
+
+
+        err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
+                               page_size);
+        if (err)
+                return err;
+        wqe->recv.pagesz[0] = page_size[0];
+        wqe->recv.pagesz[1] = page_size[1];
+        wqe->recv.pagesz[2] = page_size[2];
+        wqe->recv.pagesz[3] = page_size[3];
 	wqe->recv.num_sgle = htobe32(wr->num_sge);
+
 	for (i = 0; i < wr->num_sge; i++) {
 		wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
 		wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
-		wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
+		wqe->recv.sgl[i].to = htobe64(((u32)wr->sg_list[i].addr) &
+				((1UL << (12 + page_size[i])) - 1));
+		/* pbl_addr is the adapters address in the PBL */
+		wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
 	}
 	for (; i < T3_MAX_SGE; i++) {
 		wqe->recv.sgl[i].stag = 0;
 		wqe->recv.sgl[i].len = 0;
 		wqe->recv.sgl[i].to = 0;
+		wqe->recv.pbl_addr[i] = 0;
 	}
+
+        qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+                             qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+        qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+                             qhp->wq.rq_size_log2)].pbl_addr = 0;
+
 	return 0;
 }
 
+static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
+                                struct ib_recv_wr *wr)
+{
+        int i;
+        u32 pbl_addr;
+        u32 pbl_offset;
+
+
+        /*
+         * The T3 HW requires the PBL in the HW recv descriptor to reference
+         * a PBL entry.  So we allocate the max needed PBL memory here and pass
+         * it to the uP in the recv WR.  The uP will build the PBL and setup
+         * the HW recv descriptor.
+         */
+        pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
+        if (!pbl_addr)
+                return -ENOMEM;
+
+        /*
+         * Compute the 8B aligned offset.
+         */
+        pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
+
+        wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+
+        for (i = 0; i < wr->num_sge; i++) {
+
+                /*
+                 * Use a 128MB page size. This and an imposed 128MB
+                 * sge length limit allows us to require only a 2-entry HW
+                 * PBL for each SGE.  This restriction is acceptable since
+                 * since it is not possible to allocate 128MB of contiguous
+                 * DMA coherent memory!
+                 */
+                if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
+                        return -EINVAL;
+                wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
+
+                /*
+                 * T3 restricts a recv to all zero-stag or all non-zero-stag.
+                 */
+                if (wr->sg_list[i].lkey != 0)
+                        return -EINVAL;
+                wqe->recv.sgl[i].stag = 0;
+                wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
+                wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
+                wqe->recv.pbl_addr[i] = htobe32(pbl_offset);
+                pbl_offset += 2;
+        }
+        for (; i < T3_MAX_SGE; i++) {
+                wqe->recv.pagesz[i] = 0;
+                wqe->recv.sgl[i].stag = 0;
+                wqe->recv.sgl[i].len = 0;
+                wqe->recv.sgl[i].to = 0;
+                wqe->recv.pbl_addr[i] = 0;
+        }
+        qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+                             qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+        qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+                             qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
+        return 0;
+}
+
 int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		      struct ib_send_wr **bad_wr)
 {
@@ -282,18 +367,19 @@
 	mtx_lock(&qhp->lock);
 	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
 		mtx_unlock(&qhp->lock);
-		return (-EINVAL);
+		err = -EINVAL;
+		goto out;
 	}
 	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
 		  qhp->wq.sq_size_log2);
-	if (num_wrs <= 0) {
+	if (num_wrs == 0) {
 		mtx_unlock(&qhp->lock);
-		return (-ENOMEM);
+		err = -EINVAL;
+		goto out;
 	}
 	while (wr) {
 		if (num_wrs == 0) {
 			err = -ENOMEM;
-			*bad_wr = wr;
 			break;
 		}
 		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -311,17 +397,17 @@
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
 			t3_wr_opcode = T3_WR_SEND;
-			err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+			err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
 			break;
 		case IB_WR_RDMA_WRITE:
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			t3_wr_opcode = T3_WR_WRITE;
-			err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+			err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
 			break;
 		case IB_WR_RDMA_READ:
 			t3_wr_opcode = T3_WR_READ;
 			t3_wr_flags = 0; /* T3 reads are always signaled */
-			err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+			err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
 			if (err)
 				break;
 			sqp->read_len = wqe->read.local_len;
@@ -333,10 +419,9 @@
 			     wr->opcode);
 			err = -EINVAL;
 		}
-		if (err) {
-			*bad_wr = wr;
+		if (err)
 			break;
-		}
+
 		wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
 		sqp->wr_id = wr->wr_id;
 		sqp->opcode = wr2opcode(t3_wr_opcode);
@@ -358,6 +443,9 @@
 	}
 	mtx_unlock(&qhp->lock);
 	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+out:
+	if (err)
+		*bad_wr = wr;
 	return err;
 }
 
@@ -374,27 +462,35 @@
 	mtx_lock(&qhp->lock);
 	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
 		mtx_unlock(&qhp->lock);
-		return (-EINVAL);
+		err = -EINVAL;
+		goto out;
 	}
 	num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
 			    qhp->wq.rq_size_log2) - 1;
 	if (!wr) {
 		mtx_unlock(&qhp->lock);
-		return (-EINVAL);
+		err = -EINVAL;
+		goto out;
 	}
+
 	while (wr) {
+	        if (wr->num_sge > T3_MAX_SGE) {
+                        err = -EINVAL;
+                        break;
+                }
+
 		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
 		wqe = (union t3_wr *) (qhp->wq.queue + idx);
-		if (num_wrs)
-			err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
-		else
+		if (num_wrs) {
+                        if (wr->sg_list[0].lkey)
+                                err = build_rdma_recv(qhp, wqe, wr);
+                        else
+                                err = build_zero_stag_recv(qhp, wqe, wr);
+		} else
 			err = -ENOMEM;
-		if (err) {
-			*bad_wr = wr;
+		if (err)
 			break;
-		}
-		qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
-			wr->wr_id;
+
 		build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
 			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
 			       0, sizeof(struct t3_receive_wr) >> 3);
@@ -408,6 +504,9 @@
 	}
 	mtx_unlock(&qhp->lock);
 	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+out:
+        if (err)
+                *bad_wr = wr;
 	return err;
 }
 
@@ -439,7 +538,7 @@
 	}
 	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
 			    qhp->wq.sq_size_log2);
-	if ((num_wrs) <= 0) {
+	if ((num_wrs) == 0) {
 		mtx_unlock(&qhp->lock);
 		return (-ENOMEM);
 	}
@@ -491,7 +590,7 @@
 	return err;
 }
 
-static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
+static void build_term_codes(struct respQ_msg_t *rsp_msg,
 				    u8 *layer_type, u8 *ecode)
 {
 	int status = TPT_ERR_INTERNAL_ERR;
@@ -631,15 +730,18 @@
 	union t3_wr *wqe;
 	struct terminate_message *term;
 	struct mbuf *m;
+	struct ofld_hdr *oh;
 
-	CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
-	m = m_gethdr(MT_DATA, M_NOWAIT);
-	if (!m) {
+	CTR3(KTR_IW_CXGB, "%s: tid %u, %p", __func__, qhp->ep->hwtid, rsp_msg);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
+	if (m == NULL) {
 		log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
 		return (-ENOMEM);
 	}
-	wqe = mtod(m, union t3_wr *);
-	m->m_len = m->m_pkthdr.len = 40;
+	oh = mtod(m, struct ofld_hdr *);
+	m->m_pkthdr.len = m->m_len = sizeof(*oh) + 40;
+	oh->flags = V_HDR_NDESC(1) | V_HDR_CTRL(CPL_PRIORITY_DATA) | V_HDR_QSET(0);
+	wqe = (void *)(oh + 1);
 	memset(wqe, 0, 40);
 	wqe->send.rdmaop = T3_TERMINATE;
 
@@ -653,22 +755,17 @@
 		V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
 	wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
 
-	m_set_priority(m, CPL_PRIORITY_DATA);
-	m_set_sgl(m, NULL);
-	m_set_sgllen(m, 0);
-	return cxgb_ofld_send(qhp->rhp->rdev.t3cdev_p, m);
+	return t3_offload_tx(qhp->rhp->rdev.adap, m);
 }
 
 /*
  * Assumes qhp lock is held.
  */
-static void __flush_qp(struct iwch_qp *qhp)
+static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
+			struct iwch_cq *schp)
 {
-	struct iwch_cq *rchp, *schp;
 	int count;
-
-	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
-	schp = get_chp(qhp->rhp, qhp->attr.scq);
+	int flushed;
 
 	CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
 	/* take a ref on the qhp since we must release the lock */
@@ -680,20 +777,22 @@
 	mtx_lock(&qhp->lock);
 	cxio_flush_hw_cq(&rchp->cq);
 	cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
-	cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+	flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
 	mtx_unlock(&qhp->lock);
 	mtx_unlock(&rchp->lock);
- 	(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+	if (flushed)
+ 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
 
 	/* locking hierarchy: cq lock first, then qp lock. */
 	mtx_lock(&schp->lock);
 	mtx_lock(&qhp->lock);
 	cxio_flush_hw_cq(&schp->cq);
 	cxio_count_scqes(&schp->cq, &qhp->wq, &count);
-	cxio_flush_sq(&qhp->wq, &schp->cq, count);
+	flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
 	mtx_unlock(&qhp->lock);
 	mtx_unlock(&schp->lock);
- 	(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+	if (flushed)
+ 		(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
 
 	/* deref */
 	mtx_lock(&qhp->lock);
@@ -703,10 +802,23 @@
 
 static void flush_qp(struct iwch_qp *qhp)
 {
-	if (qhp->ibqp.uobject)
+	struct iwch_cq *rchp, *schp;
+
+	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+	schp = get_chp(qhp->rhp, qhp->attr.scq);
+
+	if (qhp->ibqp.uobject) {
 		cxio_set_wq_in_error(&qhp->wq);
-	else
-		__flush_qp(qhp);
+		cxio_set_cq_in_error(&rchp->cq);
+               	(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+               	if (schp != rchp) {
+                	cxio_set_cq_in_error(&schp->cq);
+                       	(*schp->ibcq.comp_handler)(&schp->ibcq,
+                        				schp->ibcq.cq_context);
+               	}
+               	return;
+       	}
+       	__flush_qp(qhp, rchp, schp);
 }
 
 
@@ -715,7 +827,13 @@
  */
 static int rqes_posted(struct iwch_qp *qhp)
 {
-	return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
+       union t3_wr *wqe = qhp->wq.queue;
+        u16 count = 0;
+        while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+                count++;
+                wqe++;
+        }
+        return count;
 }
 
 static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
@@ -724,6 +842,10 @@
 {
 	struct t3_rdma_init_attr init_attr;
 	int ret;
+	struct socket *so = qhp->ep->com.so;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+	struct toepcb *toep;
 
 	init_attr.tid = qhp->ep->hwtid;
 	init_attr.qpid = qhp->wq.qpid;
@@ -737,32 +859,28 @@
 		(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
 		(qhp->attr.mpa_attr.crc_enabled << 2);
 
-	/*
-	 * XXX - The IWCM doesn't quite handle getting these
-	 * attrs set before going into RTS.  For now, just turn
-	 * them on always...
-	 */
-#if 0
-	init_attr.qpcaps = qhp->attr.enableRdmaRead |
-		(qhp->attr.enableRdmaWrite << 1) |
-		(qhp->attr.enableBind << 2) |
-		(qhp->attr.enable_stag0_fastreg << 3) |
-		(qhp->attr.enable_stag0_fastreg << 4);
-#else
-	init_attr.qpcaps = 0x1f;
-#endif
+	init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
+			   uP_RI_QP_RDMA_WRITE_ENABLE |
+			   uP_RI_QP_BIND_ENABLE;
+	if (!qhp->ibqp.uobject)
+		init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE;
 	init_attr.tcp_emss = qhp->ep->emss;
 	init_attr.ord = qhp->attr.max_ord;
 	init_attr.ird = qhp->attr.max_ird;
 	init_attr.qp_dma_addr = qhp->wq.dma_addr;
 	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
-	init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+	init_attr.rqe_count = rqes_posted(qhp);
+	init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
+	init_attr.rtr_type = 0;
+	tp = intotcpcb(inp);
+	toep = tp->t_toe;
+	init_attr.chan = toep->tp_l2t->smt_idx;
 	init_attr.irs = qhp->ep->rcv_seq;
 	CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
 	     "flags 0x%x qpcaps 0x%x", __FUNCTION__,
 	     init_attr.rq_addr, init_attr.rq_size,
 	     init_attr.flags, init_attr.qpcaps);
-	ret = cxio_rdma_init(&rhp->rdev, &init_attr);
+	ret = cxio_rdma_init(&rhp->rdev, &init_attr, qhp->ep->com.so);
 	CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
 	return ret;
 }
@@ -870,8 +988,8 @@
 				abort=0;
 				disconnect = 1;
 				ep = qhp->ep;
+				get_ep(&ep->com);
 			}
-			flush_qp(qhp);
 			break;
 		case IWCH_QP_STATE_TERMINATE:
 			qhp->attr.state = IWCH_QP_STATE_TERMINATE;
@@ -886,6 +1004,7 @@
 				abort=1;
 				disconnect = 1;
 				ep = qhp->ep;
+				get_ep(&ep->com);
 			}
 			goto err;
 			break;
@@ -901,6 +1020,7 @@
 		}
 		switch (attrs->next_state) {
 			case IWCH_QP_STATE_IDLE:
+				flush_qp(qhp);
 				qhp->attr.state = IWCH_QP_STATE_IDLE;
 				qhp->attr.llp_stream_handle = NULL;
 				put_ep(&qhp->ep->com);
@@ -908,7 +1028,6 @@
 				wakeup(qhp);
 				break;
 			case IWCH_QP_STATE_ERROR:
-				disconnect=1;
 				goto err;
 			default:
 				ret = -EINVAL;
@@ -960,81 +1079,29 @@
 out:
 	mtx_unlock(&qhp->lock);
 
-	if (terminate)
+	if (terminate) 
 		iwch_post_terminate(qhp, NULL);
+	
 
 	/*
 	 * If disconnect is 1, then we need to initiate a disconnect
 	 * on the EP.  This can be a normal close (RTS->CLOSING) or
 	 * an abnormal close (RTS/CLOSING->ERROR).
 	 */
-	if (disconnect)
+	if (disconnect) {
 		iwch_ep_disconnect(ep, abort, M_NOWAIT);
-
+		put_ep(&ep->com);
+	}
+	
 	/*
 	 * If free is 1, then we've disassociated the EP from the QP
 	 * and we need to dereference the EP.
 	 */
-	if (free)
+	if (free) 
 		put_ep(&ep->com);
+	
 
 	CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);
 	return ret;
 }
-
-static int quiesce_qp(struct iwch_qp *qhp)
-{
-	mtx_lock(&qhp->lock);
-	iwch_quiesce_tid(qhp->ep);
-	qhp->flags |= QP_QUIESCED;
-	mtx_unlock(&qhp->lock);
-	return 0;
-}
-
-static int resume_qp(struct iwch_qp *qhp)
-{
-	mtx_lock(&qhp->lock);
-	iwch_resume_tid(qhp->ep);
-	qhp->flags &= ~QP_QUIESCED;
-	mtx_lock(&qhp->lock);
-	return 0;
-}
-
-int iwch_quiesce_qps(struct iwch_cq *chp)
-{
-	int i;
-	struct iwch_qp *qhp;
-
-	for (i=0; i < T3_MAX_NUM_QP; i++) {
-		qhp = get_qhp(chp->rhp, i);
-		if (!qhp)
-			continue;
-		if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
-			quiesce_qp(qhp);
-			continue;
-		}
-		if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
-			quiesce_qp(qhp);
-	}
-	return 0;
-}
-
-int iwch_resume_qps(struct iwch_cq *chp)
-{
-	int i;
-	struct iwch_qp *qhp;
-
-	for (i=0; i < T3_MAX_NUM_QP; i++) {
-		qhp = get_qhp(chp->rhp, i);
-		if (!qhp)
-			continue;
-		if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
-			resume_qp(qhp);
-			continue;
-		}
-		if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
-			resume_qp(qhp);
-	}
-	return 0;
-}
-
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c	Wed Jul 25 17:04:43 2012 +0300
@@ -27,13 +27,15 @@
 
 ***************************************************************************/
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/module.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
@@ -59,9 +61,11 @@
 
 #include <netinet/in.h>
 
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
 #include <cxgb_include.h>
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -369,3 +373,4 @@
 {
 	gen_pool_destroy(rdev_p->rqt_pool);
 }
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h	Wed Jul 25 17:04:43 2012 +0300
@@ -29,7 +29,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $FreeBSD$
+ * $FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h 237263 2012-06-19 07:34:13Z np $
  */
 #ifndef __IWCH_USER_H__
 #define __IWCH_USER_H__
@@ -47,10 +47,18 @@
 	uint64_t user_rptr_addr;
 };
 
+struct iwch_create_cq_resp_v0 {
+        __u64 key;
+        __u32 cqid;
+        __u32 size_log2;
+};
+
 struct iwch_create_cq_resp {
 	uint64_t key;
 	uint32_t cqid;
 	uint32_t size_log2;
+	__u32 memsize;
+	__u32 reserved;
 };
 
 struct iwch_create_qp_resp {
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h
--- a/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h	Wed Jul 25 17:04:43 2012 +0300
@@ -25,13 +25,16 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 #ifndef __CXIO_WR_H__
 #define __CXIO_WR_H__
 #define T3_MAX_SGE      4
 #define T3_MAX_INLINE	64
+#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
+#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
+#define T3_STAG0_PAGE_SHIFT 15
 
 #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
 #define Q_FULL(rptr,wptr,size_log2)  ( (((wptr)-(rptr))>>(size_log2)) && \
@@ -272,6 +275,22 @@
 	uP_RI_QP_STAG0_ENABLE = 0x10
 } __attribute__ ((packed));
 
+enum rdma_init_rtr_types {
+        RTR_READ = 1,
+        RTR_WRITE = 2,
+        RTR_SEND = 3,
+};
+
+#define S_RTR_TYPE      2
+#define M_RTR_TYPE      0x3
+#define V_RTR_TYPE(x)   ((x) << S_RTR_TYPE)
+#define G_RTR_TYPE(x)   ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+
+#define S_CHAN          4
+#define M_CHAN          0x3
+#define V_CHAN(x)       ((x) << S_CHAN)
+#define G_CHAN(x)       ((((x) >> S_CHAN)) & M_CHAN)
+
 struct t3_rdma_init_attr {
 	u32 tid;
 	u32 qpid;
@@ -287,8 +306,11 @@
 	u32 ird;
 	u64 qp_dma_addr;
 	u32 qp_dma_size;
-	u32 flags;
+	enum rdma_init_rtr_types rtr_type;
+	u16 flags;
+	u16 rqe_count;
 	u32 irs;
+	u32 chan;
 };
 
 struct t3_rdma_init_wr {
@@ -303,13 +325,13 @@
 	u8 mpaattrs;		/* 5 */
 	u8 qpcaps;
 	__be16 ulpdu_size;
-	__be32 flags;		/* bits 31-1 - reservered */
-				/* bit     0 - set if RECV posted */
+	__be16 flags_rtr_type;
+        __be16 rqe_count;
 	__be32 ord;		/* 6 */
 	__be32 ird;
 	__be64 qp_dma_addr;	/* 7 */
 	__be32 qp_dma_size;	/* 8 */
-	u32 irs;
+	__be32 irs;
 };
 
 struct t3_genbit {
@@ -318,7 +340,8 @@
 };
 
 enum rdma_init_wr_flags {
-	RECVS_POSTED = 1,
+        MPA_INITIATOR = (1<<0),
+        PRIV_QP = (1<<1),
 };
 
 union t3_wr {
@@ -531,6 +554,12 @@
 #define CQE_STATUS(x)     (G_CQE_STATUS(be32toh((x).header)))
 #define CQE_OPCODE(x)     (G_CQE_OPCODE(be32toh((x).header)))
 
+#define CQE_SEND_OPCODE(x)( \
+	(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \
+	(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \
+	(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \
+	(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV))
+
 #define CQE_LEN(x)        (be32toh((x).len))
 
 /* used for RQ completion processing */
@@ -589,21 +618,23 @@
 	uint64_t		wr_id;
 	struct t3_cqe		cqe;
 	uint32_t		sq_wptr;
-	uint32_t		read_len;
+	__be32   		read_len;
 	int			opcode;
 	int			complete;
 	int			signaled;
 };
 
+struct t3_swrq {
+        __u64                   wr_id;
+        __u32                   pbl_addr;
+};
+
 /*
  * A T3 WQ implements both the SQ and RQ.
  */
 struct t3_wq {
 	union t3_wr *queue;		/* DMA accessable memory */
 	bus_addr_t dma_addr;		/* DMA address for HW */
-#ifdef notyet	
-	DECLARE_PCI_UNMAP_ADDR(mapping)	/* unmap kruft */
-#endif		
 	u32 error;			/* 1 once we go to ERROR */
 	u32 qpid;
 	u32 wptr;			/* idx to next available WR slot */
@@ -613,14 +644,15 @@
 	u32 sq_wptr;			/* sq_wptr - sq_rptr == count of */
 	u32 sq_rptr;			/* pending wrs */
 	u32 sq_size_log2;		/* sq size */
-	u64 *rq;			/* SW RQ (holds consumer wr_ids */
+        struct t3_swrq *rq;             /* SW RQ (holds consumer wr_ids */
 	u32 rq_wptr;			/* rq_wptr - rq_rptr == count of */
 	u32 rq_rptr;			/* pending wrs */
-	u64 *rq_oldest_wr;		/* oldest wr on the SW RQ */
+	struct t3_swrq *rq_oldest_wr;	/* oldest wr on the SW RQ */
 	u32 rq_size_log2;		/* rq size */
 	u32 rq_addr;			/* rq adapter address */
-	void /* __iomem */ *doorbell;	/* kernel db */
+	void *doorbell;			/* kernel db */
 	u64 udb;			/* user db if any */
+	struct cxio_rdev *rdev;
 };
 
 struct t3_cq {
@@ -629,9 +661,6 @@
 	u32 wptr;
 	u32 size_log2;
 	bus_addr_t dma_addr;
-#ifdef notyet	
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-#endif		
 	struct t3_cqe *queue;
 	struct t3_cqe *sw_queue;
 	u32 sw_rptr;
@@ -641,6 +670,22 @@
 #define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
 					 CQE_GENBIT(*cqe))
 
+struct t3_cq_status_page {
+        u32 cq_err;
+};
+
+static inline int cxio_cq_in_error(struct t3_cq *cq)
+{
+        return ((struct t3_cq_status_page *)
+                &cq->queue[1 << cq->size_log2])->cq_err;
+}
+
+static inline void cxio_set_cq_in_error(struct t3_cq *cq)
+{
+        ((struct t3_cq_status_page *)
+         &cq->queue[1 << cq->size_log2])->cq_err = 1;
+}
+
 static inline void cxio_set_wq_in_error(struct t3_wq *wq)
 {
 	wq->queue->flit[13] = 1;
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h
--- a/head/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-$FreeBSD$
-
-***************************************************************************/
-
-#ifndef _CXGB_TOEDEV_H_
-#define _CXGB_TOEDEV_H_
-#include <netinet/toedev.h>
-
-
-/* offload type ids */
-enum {
-	TOE_ID_CHELSIO_T1 = 1,
-	TOE_ID_CHELSIO_T1C,
-	TOE_ID_CHELSIO_T2,
-	TOE_ID_CHELSIO_T3,
-	TOE_ID_CHELSIO_T3B,
-	TOE_ID_CHELSIO_T3C,
-}
-	;
-
-#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/toecore/toedev.c
--- a/head/sys/dev/cxgb/ulp/toecore/toedev.c	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,420 +0,0 @@
-
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bus.h>
-#include <sys/module.h>
-#include <sys/queue.h>
-#include <sys/mbuf.h>
-#include <sys/proc.h>
-
-#include <sys/socket.h>
-#include <sys/sockio.h>
-
-#include <net/bpf.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/route.h>
-
-
-/*
- * XXX 
- */
-#include <cxgb_include.h>
-#include <ulp/toecore/cxgb_toedev.h>
-
-static struct mtx offload_db_lock;
-static TAILQ_HEAD(, toedev) offload_dev_list;
-static TAILQ_HEAD(, tom_info) offload_module_list;
-
-/*
- * Returns the entry in the given table with the given offload id, or NULL
- * if the id is not found.
- */
-static const struct offload_id *
-id_find(unsigned int id, const struct offload_id *table)
-{
-	for ( ; table->id; ++table)
-		if (table->id == id)
-			return table;
-	return NULL;
-}
-
-/*
- * Returns true if an offload device is presently attached to an offload module.
- */
-static inline int
-is_attached(const struct toedev *dev)
-{
-	return dev->tod_offload_mod != NULL;
-}
-
-/*
- * Try to attach a new offload device to an existing TCP offload module that
- * can handle the device's offload id.  Returns 0 if it succeeds.
- *
- * Must be called with the offload_db_lock held.
- */
-static int
-offload_attach(struct toedev *dev)
-{
-	struct tom_info *t;
-
-	TAILQ_FOREACH(t, &offload_module_list, entry) {
-		const struct offload_id *entry;
-
-		entry = id_find(dev->tod_ttid, t->ti_id_table);
-		if (entry && t->ti_attach(dev, entry) == 0) {
-			dev->tod_offload_mod = t;
-			return 0;
-		}
-	}
-	return (ENOPROTOOPT);
-}
-
-/**
- * register_tom - register a TCP Offload Module (TOM)
- * @t: the offload module to register
- *
- * Register a TCP Offload Module (TOM).
- */
-int
-register_tom(struct tom_info *t)
-{
-	mtx_lock(&offload_db_lock);
-	toedev_registration_count++;
-	TAILQ_INSERT_HEAD(&offload_module_list, t, entry);
-	mtx_unlock(&offload_db_lock);
-	return 0;
-}
-
-/**
- * unregister_tom - unregister a TCP Offload Module (TOM)
- * @t: the offload module to register
- *
- * Unregister a TCP Offload Module (TOM).  Note that this does not affect any
- * TOE devices to which the TOM is already attached.
- */
-int
-unregister_tom(struct tom_info *t)
-{
-	mtx_lock(&offload_db_lock);
-	TAILQ_REMOVE(&offload_module_list, t, entry);
-	mtx_unlock(&offload_db_lock);
-	return 0;
-}
-
-/*
- * Find an offload device by name.  Must be called with offload_db_lock held.
- */
-static struct toedev *
-__find_offload_dev_by_name(const char *name)
-{
-	struct toedev *dev;
-
-	TAILQ_FOREACH(dev, &offload_dev_list, entry) {
-		if (!strncmp(dev->tod_name, name, TOENAMSIZ))
-			return dev;
-	}
-	return NULL;
-}
-
-/*
- * Returns true if an offload device is already registered.
- * Must be called with the offload_db_lock held.
- */
-static int
-is_registered(const struct toedev *dev)
-{
-	struct toedev *d;
-
-	TAILQ_FOREACH(d, &offload_dev_list, entry) {
-		if (d == dev)
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Finalize the name of an offload device by assigning values to any format
- * strings in its name.
- */
-static int
-assign_name(struct toedev *dev, const char *name, int limit)
-{
-	int i;
-
-	for (i = 0; i < limit; ++i) {
-		char s[TOENAMSIZ];
-
-		if (snprintf(s, sizeof(s), name, i) >= sizeof(s))
-			return -1;                  /* name too long */
-		if (!__find_offload_dev_by_name(s)) {
-			strcpy(dev->tod_name, s);
-			return 0;
-		}
-	}
-	return -1;
-}
-
-/**
- * register_toedev - register a TOE device
- * @dev: the device
- * @name: a name template for the device
- *
- * Register a TOE device and try to attach an appropriate TCP offload module
- * to it.  @name is a template that may contain at most one %d format
- * specifier.
- */
-int
-register_toedev(struct toedev *dev, const char *name)
-{
-	int ret;
-	const char *p;
-
-	/*
-	 * Validate the name template.  Only one %d allowed and name must be
-	 * a valid filename so it can appear in sysfs.
-	 */
-	if (!name || !*name || !strcmp(name, ".") || !strcmp(name, "..") ||
-	    strchr(name, '/'))
-		return EINVAL;
-
-	p = strchr(name, '%');
-	if (p && (p[1] != 'd' || strchr(p + 2, '%')))
-		return EINVAL;
-
-	mtx_lock(&offload_db_lock);
-	if (is_registered(dev)) {  /* device already registered */
-		ret = EEXIST;
-		goto out;
-	}
-
-	if ((ret = assign_name(dev, name, 32)) != 0)
-		goto out;
-
-	dev->tod_offload_mod = NULL;
-	TAILQ_INSERT_TAIL(&offload_dev_list, dev, entry);
-out:
-	mtx_unlock(&offload_db_lock);
-	return ret;
-}
-
-/**
- * unregister_toedev - unregister a TOE device
- * @dev: the device
- *
- * Unregister a TOE device.  The device must not be attached to an offload
- * module.
- */
-int
-unregister_toedev(struct toedev *dev)
-{
-	int ret = 0;
-
-	mtx_lock(&offload_db_lock);
-	if (!is_registered(dev)) {
-		ret = ENODEV;
-		goto out;
-	}
-	if (is_attached(dev)) {
-		ret = EBUSY;
-		goto out;
-	}
-	TAILQ_REMOVE(&offload_dev_list, dev, entry);
-out:
-	mtx_unlock(&offload_db_lock);
-	return ret;
-}
-
-/**
- * activate_offload - activate an offload device
- * @dev: the device
- *
- * Activate an offload device by locating an appropriate registered offload
- * module.  If no module is found the operation fails and may be retried at
- * a later time.
- */
-int
-activate_offload(struct toedev *dev)
-{
-	int ret = 0;
-
-	mtx_lock(&offload_db_lock);
-	if (!is_registered(dev))
-		ret = ENODEV;
-	else if (!is_attached(dev))
-		ret = offload_attach(dev);
-	mtx_unlock(&offload_db_lock);
-	return ret;
-}
-
-/**
- * toe_send - send a packet to a TOE device
- * @dev: the device
- * @m: the packet
- *
- * Sends an mbuf to a TOE driver after dealing with any active network taps.
- */
-int
-toe_send(struct toedev *dev, struct mbuf *m)
-{
-	int r;
-
-	critical_enter(); /* XXX neccessary? */
-	r = dev->tod_send(dev, m);
-	critical_exit();
-	if (r)
-		BPF_MTAP(dev->tod_lldev, m);
-	return r;
-}
-
-/**
- * toe_receive_mbuf - process n received TOE packets
- * @dev: the toe device
- * @m: an array of offload packets
- * @n: the number of offload packets
- *
- * Process an array of ingress offload packets.  Each packet is forwarded
- * to any active network taps and then passed to the toe device's receive
- * method.  We optimize passing packets to the receive method by passing
- * it the whole array at once except when there are active taps.
- */
-int
-toe_receive_mbuf(struct toedev *dev, struct mbuf **m, int n)
-{
-	if (__predict_true(!bpf_peers_present(dev->tod_lldev->if_bpf)))
-		return dev->tod_recv(dev, m, n);
-
-	for ( ; n; n--, m++) {
-		m[0]->m_pkthdr.rcvif = dev->tod_lldev;
-		BPF_MTAP(dev->tod_lldev, m[0]);
-		dev->tod_recv(dev, m, 1);
-	}
-	return 0;
-}
-
-static inline int
-ifnet_is_offload(const struct ifnet *ifp)
-{
-	return (ifp->if_flags & IFCAP_TOE);
-}
-
-void
-toe_arp_update(struct rtentry *rt)
-{
-	struct ifnet *ifp = rt->rt_ifp;
-
-	if (ifp && ifnet_is_offload(ifp)) {
-		struct toedev *tdev = TOEDEV(ifp);
-
-		if (tdev && tdev->tod_arp_update)
-			tdev->tod_arp_update(tdev, rt);
-	}
-}
-
-/**
- * offload_get_phys_egress - find the physical egress device
- * @root_dev: the root device anchoring the search
- * @so: the socket used to determine egress port in bonding mode
- * @context: in bonding mode, indicates a connection set up or failover
- *
- * Given a root network device it returns the physical egress device that is a
- * descendant of the root device.  The root device may be either a physical
- * device, in which case it is the device returned, or a virtual device, such
- * as a VLAN or bonding device.  In case of a bonding device the search
- * considers the decisions of the bonding device given its mode to locate the
- * correct egress device.
- */
-struct ifnet *
-offload_get_phys_egress(struct ifnet *root_dev, struct socket *so, int context)
-{
-
-#if 0
-	while (root_dev && ifnet_is_offload(root_dev)) {
-		if (root_dev->tod_priv_flags & IFF_802_1Q_VLAN)
-			root_dev = VLAN_DEV_INFO(root_dev)->real_dev;
-		else if (root_dev->tod_flags & IFF_MASTER)
-			root_dev = toe_bond_get_slave(root_dev, sk, context);
-		else
-			break;
-	}
-#endif
-	return root_dev;
-}
-
-static int
-toecore_load(module_t mod, int cmd, void *arg)
-{
-	int err = 0;
-
-	switch (cmd) {
-	case MOD_LOAD:
-		mtx_init(&offload_db_lock, "toedev lock", NULL, MTX_DEF);
-		TAILQ_INIT(&offload_dev_list);
-		TAILQ_INIT(&offload_module_list);
-		break;
-	case MOD_QUIESCE:
-		break;
-	case MOD_UNLOAD:
-		mtx_lock(&offload_db_lock);
-		if (!TAILQ_EMPTY(&offload_dev_list) ||
-		    !TAILQ_EMPTY(&offload_module_list)) {
-			err = EBUSY;
-			mtx_unlock(&offload_db_lock);
-			break;
-		}
-		mtx_unlock(&offload_db_lock);
-		mtx_destroy(&offload_db_lock);
-		break;
-	case MOD_SHUTDOWN:
-		break;
-	default:
-		err = EOPNOTSUPP;
-		break;
-	}
-
-	return (err);
-}
-
-
-static moduledata_t mod_data= {
-	"toecore",
-	toecore_load,
-	0
-};
-
-MODULE_VERSION(toecore, 1);
-DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,35 +1,35 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
@@ -42,22 +42,17 @@
 #include <sys/sockstate.h>
 #include <sys/sockopt.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockbuf.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/protosw.h>
 #include <sys/priv.h>
-
-#if __FreeBSD_version < 800044
-#define V_tcp_do_autosndbuf tcp_do_autosndbuf
-#define V_tcp_autosndbuf_max tcp_autosndbuf_max
-#define V_tcp_do_rfc1323 tcp_do_rfc1323
-#define V_tcp_do_autorcvbuf tcp_do_autorcvbuf
-#define V_tcp_autorcvbuf_max tcp_autorcvbuf_max
-#define V_tcpstat tcpstat
-#endif
+#include <sys/sglist.h>
+#include <sys/taskqueue.h>
 
 #include <net/if.h>
+#include <net/ethernet.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
@@ -65,37 +60,33 @@
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-
 #include <netinet/ip.h>
 #include <netinet/tcp_var.h>
+#define TCPSTATES
 #include <netinet/tcp_fsm.h>
-#include <netinet/tcp_offload.h>
+#include <netinet/toecore.h>
 #include <netinet/tcp_seq.h>
-#include <netinet/tcp_syncache.h>
 #include <netinet/tcp_timer.h>
 #include <net/route.h>
 
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_t3_cpl.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <cxgb_offload.h>
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/bus.h>
-#include <sys/mvec.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_l2t.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
-#include <ulp/tom/cxgb_tcp.h>
-#include <ulp/tom/cxgb_tcp_offload.h>
+#include "cxgb_include.h"
+#include "ulp/tom/cxgb_l2t.h"
+#include "ulp/tom/cxgb_tom.h"
+#include "ulp/tom/cxgb_toepcb.h"
+
+VNET_DECLARE(int, tcp_do_autosndbuf);
+#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
+VNET_DECLARE(int, tcp_autosndbuf_inc);
+#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
+VNET_DECLARE(int, tcp_autosndbuf_max);
+#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
+VNET_DECLARE(int, tcp_do_autorcvbuf);
+#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
+VNET_DECLARE(int, tcp_autorcvbuf_inc);
+#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
+VNET_DECLARE(int, tcp_autorcvbuf_max);
+#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
+extern int always_keepalive;
 
 /*
  * For ULP connections HW may add headers, e.g., for digests, that aren't part
@@ -108,29 +99,6 @@
  */
 const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
 
-#ifdef notyet
-/*
- * This sk_buff holds a fake header-only TCP segment that we use whenever we
- * need to exploit SW TCP functionality that expects TCP headers, such as
- * tcp_create_openreq_child().  It's a RO buffer that may be used by multiple
- * CPUs without locking.
- */
-static struct mbuf *tcphdr_mbuf __read_mostly;
-#endif
-
-/*
- * Size of WRs in bytes.  Note that we assume all devices we are handling have
- * the same WR size.
- */
-static unsigned int wrlen __read_mostly;
-
-/*
- * The number of WRs needed for an skb depends on the number of page fragments
- * in the skb and whether it has any payload in its main body.  This maps the
- * length of the gather list represented by an skb into the # of necessary WRs.
- */
-static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
-
 /*
  * Max receive window supported by HW in bytes.  Only a small part of it can
  * be set through option0, the rest needs to be set through RX_DATA_ACK.
@@ -144,1261 +112,760 @@
 #define MIN_RCV_WND (24 * 1024U)
 #define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
 
-#define VALIDATE_SEQ 0
-#define VALIDATE_SOCK(so)
-#define DEBUG_WR 0
+static void t3_release_offload_resources(struct toepcb *);
+static void send_reset(struct toepcb *toep);
 
-#define TCP_TIMEWAIT	1
-#define TCP_CLOSE	2
-#define TCP_DROP	3
+/*
+ * Called after the last CPL for the toepcb has been received.
+ *
+ * The inp must be wlocked on entry and is unlocked (or maybe destroyed) by the
+ * time this function exits.
+ */
+static int
+toepcb_release(struct toepcb *toep)
+{
+	struct inpcb *inp = toep->tp_inp;
+	struct toedev *tod = toep->tp_tod;
+	struct tom_data *td = t3_tomdata(tod);
+	int rc;
 
-static void t3_send_reset(struct toepcb *toep);
-static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
-static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
-static void handle_syncache_event(int event, void *arg);
+	INP_WLOCK_ASSERT(inp);
+	KASSERT(!(toep->tp_flags & TP_CPL_DONE),
+	    ("%s: double release?", __func__));
 
-static inline void
-SBAPPEND(struct sockbuf *sb, struct mbuf *n)
-{
-	struct mbuf *m;
+	CTR2(KTR_CXGB, "%s: tid %d", __func__, toep->tp_tid);
 
-	m = sb->sb_mb;
-	while (m) {
-		KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
-		    !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
-			!!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
-		KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
-			m->m_next, m->m_nextpkt, m->m_flags));
-		m = m->m_next;
-	}
-	m = n;
-	while (m) {
-		KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
-		    !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
-			!!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
-		KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
-			m->m_next, m->m_nextpkt, m->m_flags));
-		m = m->m_next;
-	}
-	KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set"));
-	sbappendstream_locked(sb, n);
-	m = sb->sb_mb;
+	toep->tp_flags |= TP_CPL_DONE;
+	toep->tp_inp = NULL;
 
-	while (m) {
-		KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
-			m->m_next, m->m_nextpkt, m->m_flags));
-		m = m->m_next;
-	}
+	mtx_lock(&td->toep_list_lock);
+	TAILQ_REMOVE(&td->toep_list, toep, link);
+	mtx_unlock(&td->toep_list_lock);
+
+	if (!(toep->tp_flags & TP_ATTACHED))
+		t3_release_offload_resources(toep);
+
+	rc = in_pcbrele_wlocked(inp);
+	if (!rc)
+		INP_WUNLOCK(inp);
+	return (rc);
 }
 
-static inline int
-is_t3a(const struct toedev *dev)
+/*
+ * One sided detach.  The tcpcb is going away and we need to unhook the toepcb
+ * hanging off it.  If the TOE driver is also done with the toepcb we'll release
+ * all offload resources.
+ */
+static void
+toepcb_detach(struct inpcb *inp)
 {
-	return (dev->tod_ttid == TOE_ID_CHELSIO_T3);
+	struct toepcb *toep;
+	struct tcpcb *tp;
+
+	KASSERT(inp, ("%s: inp is NULL", __func__));
+	INP_WLOCK_ASSERT(inp);
+
+	tp = intotcpcb(inp);
+	toep = tp->t_toe;
+
+	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
+	KASSERT(toep->tp_flags & TP_ATTACHED, ("%s: not attached", __func__));
+
+	CTR6(KTR_CXGB, "%s: %s %u, toep %p, inp %p, tp %p", __func__,
+	    tp->t_state == TCPS_SYN_SENT ? "atid" : "tid", toep->tp_tid,
+	    toep, inp, tp);
+
+	tp->t_toe = NULL;
+	tp->t_flags &= ~TF_TOE;
+	toep->tp_flags &= ~TP_ATTACHED;
+
+	if (toep->tp_flags & TP_CPL_DONE)
+		t3_release_offload_resources(toep);
+}
+
+void
+t3_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
+{
+
+	toepcb_detach(tp->t_inpcb);
+}
+
+static int
+alloc_atid(struct tid_info *t, void *ctx)
+{
+	int atid = -1;
+
+	mtx_lock(&t->atid_lock);
+	if (t->afree) {
+		union active_open_entry *p = t->afree;
+
+		atid = (p - t->atid_tab) + t->atid_base;
+		t->afree = p->next;
+		p->ctx = ctx;
+		t->atids_in_use++;
+	}
+	mtx_unlock(&t->atid_lock);
+
+	return (atid);
 }
 
 static void
-dump_toepcb(struct toepcb *toep)
+free_atid(struct tid_info *t, int atid)
 {
-	DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
-	    toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
-	    toep->tp_mtu_idx, toep->tp_tid);
+	union active_open_entry *p = atid2entry(t, atid);
 
-	DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
-	    toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 
-	    toep->tp_mss_clamp, toep->tp_flags);
+	mtx_lock(&t->atid_lock);
+	p->next = t->afree;
+	t->afree = p;
+	t->atids_in_use--;
+	mtx_unlock(&t->atid_lock);
 }
 
-#ifndef RTALLOC2_DEFINED
-static struct rtentry *
-rtalloc2(struct sockaddr *dst, int report, u_long ignflags)
+void
+insert_tid(struct tom_data *td, void *ctx, unsigned int tid)
 {
-	struct rtentry *rt = NULL;
-	
-	if ((rt = rtalloc1(dst, report, ignflags)) != NULL)
-		RT_UNLOCK(rt);
+	struct tid_info *t = &td->tid_maps;
 
-	return (rt);
-}
-#endif
-
-/*
- * Determine whether to send a CPL message now or defer it.  A message is
- * deferred if the connection is in SYN_SENT since we don't know the TID yet.
- * For connections in other states the message is sent immediately.
- * If through_l2t is set the message is subject to ARP processing, otherwise
- * it is sent directly.
- */
-static inline void
-send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t)
-{
-	struct tcpcb *tp = toep->tp_tp;
-
-	if (__predict_false(tp->t_state == TCPS_SYN_SENT)) {
-		inp_wlock(tp->t_inpcb);
-		mbufq_tail(&toep->out_of_order_queue, m);  // defer
-		inp_wunlock(tp->t_inpcb);
-	} else if (through_l2t)
-		l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);  // send through L2T
-	else
-		cxgb_ofld_send(TOEP_T3C_DEV(toep), m);          // send directly
+	t->tid_tab[tid] = ctx;
+	atomic_add_int(&t->tids_in_use, 1);
 }
 
-static inline unsigned int
-mkprio(unsigned int cntrl, const struct toepcb *toep)
+void
+update_tid(struct tom_data *td, void *ctx, unsigned int tid)
 {
-        return (cntrl);
+	struct tid_info *t = &td->tid_maps;
+
+	t->tid_tab[tid] = ctx;
+}
+
+void
+remove_tid(struct tom_data *td, unsigned int tid)
+{
+	struct tid_info *t = &td->tid_maps;
+
+	t->tid_tab[tid] = NULL;
+	atomic_add_int(&t->tids_in_use, -1);
+}
+
+/* use ctx as a next pointer in the tid release list */
+void
+queue_tid_release(struct toedev *tod, unsigned int tid)
+{
+	struct tom_data *td = t3_tomdata(tod);
+	void **p = &td->tid_maps.tid_tab[tid];
+	struct adapter *sc = tod->tod_softc;
+
+	mtx_lock(&td->tid_release_lock);
+	*p = td->tid_release_list;
+	td->tid_release_list = p;
+	if (!*p)
+		taskqueue_enqueue(sc->tq, &td->tid_release_task);
+	mtx_unlock(&td->tid_release_lock);
 }
 
 /*
- * Populate a TID_RELEASE WR.  The skb must be already propely sized.
+ * Populate a TID_RELEASE WR.
  */
 static inline void
-mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid)
+mk_tid_release(struct cpl_tid_release *cpl, unsigned int tid)
 {
-	struct cpl_tid_release *req;
 
-	m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep));
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-	req = mtod(m, struct cpl_tid_release *);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = 0;
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
+	cpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
+}
+
+void
+release_tid(struct toedev *tod, unsigned int tid, int qset)
+{
+	struct tom_data *td = t3_tomdata(tod);
+	struct adapter *sc = tod->tod_softc;
+	struct mbuf *m;
+	struct cpl_tid_release *cpl;
+#ifdef INVARIANTS
+	struct tid_info *t = &td->tid_maps;
+#endif
+
+	KASSERT(tid >= 0 && tid < t->ntids,
+	    ("%s: tid=%d, ntids=%d", __func__, tid, t->ntids));
+
+	m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
+	if (m) {
+		mk_tid_release(cpl, tid);
+		t3_offload_tx(sc, m);
+		remove_tid(td, tid);
+	} else
+		queue_tid_release(tod, tid);
+
+}
+
+void
+t3_process_tid_release_list(void *data, int pending)
+{
+	struct mbuf *m;
+	struct tom_data *td = data;
+	struct adapter *sc = td->tod.tod_softc;
+
+	mtx_lock(&td->tid_release_lock);
+	while (td->tid_release_list) {
+		void **p = td->tid_release_list;
+		unsigned int tid = p - td->tid_maps.tid_tab;
+		struct cpl_tid_release *cpl;
+
+		td->tid_release_list = (void **)*p;
+		m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, cpl); /* qs 0 here */
+		if (m == NULL)
+			break;	/* XXX: who reschedules the release task? */
+		mtx_unlock(&td->tid_release_lock);
+		mk_tid_release(cpl, tid);
+		t3_offload_tx(sc, m);
+		remove_tid(td, tid);
+		mtx_lock(&td->tid_release_lock);
+	}
+	mtx_unlock(&td->tid_release_lock);
+}
+
+static void
+close_conn(struct adapter *sc, struct toepcb *toep)
+{
+	struct mbuf *m;
+	struct cpl_close_con_req *req;
+
+	if (toep->tp_flags & TP_FIN_SENT)
+		return;
+
+	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
+	if (m == NULL)
+		CXGB_UNIMPLEMENTED();
+
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
+	req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, toep->tp_tid));
+	req->rsvd = 0;
+
+	toep->tp_flags |= TP_FIN_SENT;
+	t3_offload_tx(sc, m);
 }
 
 static inline void
-make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail)
+make_tx_data_wr(struct socket *so, struct tx_data_wr *req, int len,
+    struct mbuf *tail)
 {
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
-	struct tx_data_wr *req;
 	struct sockbuf *snd;
-	
+
 	inp_lock_assert(tp->t_inpcb);
 	snd = so_sockbuf_snd(so);
-	
-	req = mtod(m, struct tx_data_wr *);
-	m->m_len = sizeof(*req);
-	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
-	req->wr_lo = htonl(V_WR_TID(toep->tp_tid));
+
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+	req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
 	/* len includes the length of any HW ULP additions */
 	req->len = htonl(len);
 	req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
 	/* V_TX_ULP_SUBMODE sets both the mode and submode */
-	req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) |
-	                   V_TX_URG(/* skb_urgent(skb) */ 0 ) |
-	                   V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) &&
-				   (tail ? 0 : 1))));
+	req->flags = htonl(V_TX_ULP_SUBMODE(toep->tp_ulp_mode) | V_TX_URG(0) |
+	    V_TX_SHOVE(!(tp->t_flags & TF_MORETOCOME) && (tail ? 0 : 1)));
 	req->sndseq = htonl(tp->snd_nxt);
 	if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
-		req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 
-				    V_TX_CPU_IDX(toep->tp_qset));
- 
-		/* Sendbuffer is in units of 32KB.
-		 */
+		struct adapter *sc = toep->tp_tod->tod_softc;
+		int cpu_idx = sc->rrss_map[toep->tp_qset];
+
+		req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
+		    V_TX_CPU_IDX(cpu_idx));
+
+		/* Sendbuffer is in units of 32KB. */
 		if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) 
-			req->param |= htonl(V_TX_SNDBUF(V_tcp_autosndbuf_max >> 15));
-		else {
+			req->param |= htonl(V_TX_SNDBUF(VNET(tcp_autosndbuf_max) >> 15));
+		else
 			req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
-		}
-		
+
 		toep->tp_flags |= TP_DATASENT;
 	}
 }
 
-#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */
+/*
+ * TOM_XXX_DUPLICATION sgl_len, calc_tx_descs, calc_tx_descs_ofld, mbuf_wrs, etc.
+ * TOM_XXX_MOVE to some common header file.
+ */
+/*
+ * IMM_LEN: # of bytes that can be tx'd as immediate data.  There are 16 flits
+ * in a tx desc; subtract 3 for tx_data_wr (including the WR header), and 1 more
+ * for the second gen bit flit.  This leaves us with 12 flits.
+ *
+ * descs_to_sgllen: # of SGL entries that can fit into the given # of tx descs.
+ * The first desc has a tx_data_wr (which includes the WR header), the rest have
+ * the WR header only.  All descs have the second gen bit flit.
+ *
+ * sgllen_to_descs: # of tx descs used up by an sgl of given length.  The first
+ * desc has a tx_data_wr (which includes the WR header), the rest have the WR
+ * header only.  All descs have the second gen bit flit.
+ *
+ * flits_to_sgllen: # of SGL entries that can be fit in the given # of flits.
+ *
+ */
+#define IMM_LEN 96
+static int descs_to_sgllen[TX_MAX_DESC + 1] = {0, 8, 17, 26, 35};
+static int sgllen_to_descs[TX_MAX_SEGS] = {
+	0, 1, 1, 1, 1, 1, 1, 1, 1, 2,	/*  0 -  9 */
+	2, 2, 2, 2, 2, 2, 2, 2, 3, 3,	/* 10 - 19 */
+	3, 3, 3, 3, 3, 3, 3, 4, 4, 4,	/* 20 - 29 */
+	4, 4, 4, 4, 4, 4		/* 30 - 35 */
+};
+#if 0
+static int flits_to_sgllen[TX_DESC_FLITS + 1] = {
+	0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10
+};
+#endif
+#if SGE_NUM_GENBITS != 2
+#error "SGE_NUM_GENBITS really must be 2"
+#endif
 
 int
 t3_push_frames(struct socket *so, int req_completion)
 {
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
-	
-	struct mbuf *tail, *m0, *last;
-	struct t3cdev *cdev;
-	struct tom_data *d;
-	int state, bytes, count, total_bytes;
-	bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
+	struct mbuf *m0, *sndptr, *m;
+	struct toedev *tod = toep->tp_tod;
+	struct adapter *sc = tod->tod_softc;
+	int bytes, ndesc, total_bytes = 0, mlen;
 	struct sockbuf *snd;
-	
-	if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
-		DPRINTF("tcp state=%d\n", tp->t_state);	
-		return (0);
-	}	
-
-	state = so_state_get(so);
-	
-	if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
-		DPRINTF("disconnecting\n");
-		
-		return (0);
-	}
+	struct sglist *sgl;
+	struct ofld_hdr *oh;
+	caddr_t dst;
+	struct tx_data_wr *wr;
 
 	inp_lock_assert(tp->t_inpcb);
 
 	snd = so_sockbuf_snd(so);
-	sockbuf_lock(snd);
+	SOCKBUF_LOCK(snd);
 
-	d = TOM_DATA(toep->tp_toedev);
-	cdev = d->cdev;
-
-	last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
-
-	total_bytes = 0;
-	DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n",
-	    toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last);
-
-	if (last && toep->tp_m_last == last  && snd->sb_sndptroff != 0) {
-		KASSERT(tail, ("sbdrop error"));
-		last = tail = tail->m_next;
+	/*
+	 * Autosize the send buffer.
+	 */
+	if (snd->sb_flags & SB_AUTOSIZE && VNET(tcp_do_autosndbuf)) {
+		if (snd->sb_cc >= (snd->sb_hiwat / 8 * 7) &&
+		    snd->sb_cc < VNET(tcp_autosndbuf_max)) {
+			if (!sbreserve_locked(snd, min(snd->sb_hiwat +
+			    VNET(tcp_autosndbuf_inc), VNET(tcp_autosndbuf_max)),
+			    so, curthread))
+				snd->sb_flags &= ~SB_AUTOSIZE;
+		}
 	}
 
-	if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {
-		DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail);
-		sockbuf_unlock(snd);
+	if (toep->tp_m_last && toep->tp_m_last == snd->sb_sndptr)
+		sndptr = toep->tp_m_last->m_next;
+	else
+		sndptr = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
 
-		return (0);		
-	}
-			
-	toep->tp_m_last = NULL;
-	while (toep->tp_wr_avail && (tail != NULL)) {
-		count = bytes = 0;
-		segp = segs;
-		if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
-			sockbuf_unlock(snd);
-			return (0);
+	/* Nothing to send or no WRs available for sending data */
+	if (toep->tp_wr_avail == 0 || sndptr == NULL)
+		goto out;
+
+	/* Something to send and at least 1 WR available */
+	while (toep->tp_wr_avail && sndptr != NULL) {
+
+		m0 = m_gethdr(M_NOWAIT, MT_DATA);
+		if (m0 == NULL)
+			break;
+		oh = mtod(m0, struct ofld_hdr *);
+		wr = (void *)(oh + 1);
+		dst = (void *)(wr + 1);
+
+		m0->m_pkthdr.len = m0->m_len = sizeof(*oh) + sizeof(*wr);
+		oh->flags = V_HDR_CTRL(CPL_PRIORITY_DATA) | F_HDR_DF |
+		    V_HDR_QSET(toep->tp_qset);
+
+		/*
+		 * Try to construct an immediate data WR if possible.  Stuff as
+		 * much data into it as possible, one whole mbuf at a time.
+		 */
+		mlen = sndptr->m_len;
+		ndesc = bytes = 0;
+		while (mlen <= IMM_LEN - bytes) {
+			bcopy(sndptr->m_data, dst, mlen);
+			bytes += mlen;
+			dst += mlen;
+
+			if (!(sndptr = sndptr->m_next))
+				break;
+			mlen = sndptr->m_len;
 		}
-		/*
-		 * If the data in tail fits as in-line, then
-		 * make an immediate data wr.
-		 */
-		if (tail->m_len <= IMM_LEN) {
-			count = 1;
-			bytes = tail->m_len;
-			last = tail;
-			tail = tail->m_next;
-			m_set_sgl(m0, NULL);
-			m_set_sgllen(m0, 0);
-			make_tx_data_wr(so, m0, bytes, tail);
-			m_append(m0, bytes, mtod(last, caddr_t));
-			KASSERT(!m0->m_next, ("bad append"));
+
+		if (bytes) {
+
+			/* Was able to fit 'bytes' bytes in an immediate WR */
+
+			ndesc = 1;
+			make_tx_data_wr(so, wr, bytes, sndptr);
+
+			m0->m_len += bytes;
+			m0->m_pkthdr.len = m0->m_len;
+
 		} else {
-			while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail)
-			    && (tail != NULL) && (count < TX_MAX_SEGS-1)) {
-				bytes += tail->m_len;
-				last = tail;
-				count++;
-				/*
-				 * technically an abuse to be using this for a VA
-				 * but less gross than defining my own structure
-				 * or calling pmap_kextract from here :-|
-				 */
-				segp->ds_addr = (bus_addr_t)tail->m_data;
-				segp->ds_len = tail->m_len;
-				DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n",
-				    count, mbuf_wrs[count], tail->m_data, tail->m_len);
-				segp++;
-				tail = tail->m_next;
+			int wr_avail = min(toep->tp_wr_avail, TX_MAX_DESC);
+
+			/* Need to make an SGL */
+
+			sgl = sglist_alloc(descs_to_sgllen[wr_avail], M_NOWAIT);
+			if (sgl == NULL)
+				break;
+
+			for (m = sndptr; m != NULL; m = m->m_next) {
+				if ((mlen = m->m_len) > 0) {
+					if (sglist_append(sgl, m->m_data, mlen))
+					    break;
+				}
+				bytes += mlen;
 			}
-			DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
-			    toep->tp_wr_avail, count, mbuf_wrs[count], tail);	
+			sndptr = m;
+			if (bytes == 0) {
+				sglist_free(sgl);
+				break;
+			}
+			ndesc = sgllen_to_descs[sgl->sg_nseg];
+			oh->flags |= F_HDR_SGL;
+			oh->sgl = sgl;
+			make_tx_data_wr(so, wr, bytes, sndptr);
+		}
 
-			m_set_sgl(m0, segs);
-			m_set_sgllen(m0, count);
-			make_tx_data_wr(so, m0, bytes, tail);
-		}
-		m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep));
+		oh->flags |= V_HDR_NDESC(ndesc);
+		oh->plen = bytes;
 
-		if (tail) {
-			snd->sb_sndptr = tail;
+		snd->sb_sndptr = sndptr;
+		snd->sb_sndptroff += bytes;
+		if (sndptr == NULL) {
+			snd->sb_sndptr = snd->sb_mbtail;
+			snd->sb_sndptroff -= snd->sb_mbtail->m_len;
+			toep->tp_m_last = snd->sb_mbtail;
+		} else
 			toep->tp_m_last = NULL;
-		} else 
-			toep->tp_m_last = snd->sb_sndptr = last;
 
+		total_bytes += bytes;
 
-		DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last);
+		toep->tp_wr_avail -= ndesc;
+		toep->tp_wr_unacked += ndesc;
 
-		snd->sb_sndptroff += bytes;
-		total_bytes += bytes;
-		toep->tp_write_seq += bytes;
-		CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d"
-		    " tail=%p sndptr=%p sndptroff=%d",
-		    toep->tp_wr_avail, count, mbuf_wrs[count],
-		    tail, snd->sb_sndptr, snd->sb_sndptroff);	
-		if (tail)
-			CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d"
-			    " tp_m_last=%p tailbuf=%p snd_una=0x%08x",
-			    total_bytes, toep->tp_m_last, tail->m_data,
-			    tp->snd_una);
-		else
-			CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d"
-			    " tp_m_last=%p snd_una=0x%08x",
-			    total_bytes, toep->tp_m_last, tp->snd_una);
-
-
-#ifdef KTR		
-{
-		int i;
-
-		i = 0;
-		while (i < count && m_get_sgllen(m0)) {
-			if ((count - i) >= 3) {
-				CTR6(KTR_TOM,
-				    "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
-				    " len=%d pa=0x%zx len=%d",
-				    segs[i].ds_addr, segs[i].ds_len,
-				    segs[i + 1].ds_addr, segs[i + 1].ds_len,
-				    segs[i + 2].ds_addr, segs[i + 2].ds_len);
-				    i += 3;
-			} else if ((count - i) == 2) {
-				CTR4(KTR_TOM, 
-				    "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
-				    " len=%d",
-				    segs[i].ds_addr, segs[i].ds_len,
-				    segs[i + 1].ds_addr, segs[i + 1].ds_len);
-				    i += 2;
-			} else {
-				CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d",
-				    segs[i].ds_addr, segs[i].ds_len);
-				i++;
-			}
-	
-		}
-}
-#endif		
-                 /*
-		 * remember credits used
-		 */
-		m0->m_pkthdr.csum_data = mbuf_wrs[count];
-		m0->m_pkthdr.len = bytes;
-		toep->tp_wr_avail -= mbuf_wrs[count];
-		toep->tp_wr_unacked += mbuf_wrs[count];
-		
-		if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) ||
+		if ((req_completion && toep->tp_wr_unacked == ndesc) ||
 		    toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
-			struct work_request_hdr *wr = cplhdr(m0);
-
-			wr->wr_hi |= htonl(F_WR_COMPL);
+			wr->wr.wrh_hi |= htonl(F_WR_COMPL);
 			toep->tp_wr_unacked = 0;	
 		}
-		KASSERT((m0->m_pkthdr.csum_data > 0) &&
-		    (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d",
-			m0->m_pkthdr.csum_data));
-		m0->m_type = MT_DONTFREE;
+
 		enqueue_wr(toep, m0);
-		DPRINTF("sending offload tx with %d bytes in %d segments\n",
-		    bytes, count);
-		l2t_send(cdev, m0, toep->tp_l2t);
+		l2t_send(sc, m0, toep->tp_l2t);
 	}
-	sockbuf_unlock(snd);
+out:
+	SOCKBUF_UNLOCK(snd);
+
+	if (sndptr == NULL && (toep->tp_flags & TP_SEND_FIN))
+		close_conn(sc, toep);
+
 	return (total_bytes);
 }
 
-/*
- * Close a connection by sending a CPL_CLOSE_CON_REQ message.  Cannot fail
- * under any circumstances.  We take the easy way out and always queue the
- * message to the write_queue.  We can optimize the case where the queue is
- * already empty though the optimization is probably not worth it.
- */
-static void
-close_conn(struct socket *so)
-{
-	struct mbuf *m;
-	struct cpl_close_con_req *req;
-	struct tom_data *d;
-	struct inpcb *inp = so_sotoinpcb(so);
-	struct tcpcb *tp;
-	struct toepcb *toep;
-	unsigned int tid; 
-
-
-	inp_wlock(inp);
-	tp = so_sototcpcb(so);
-	toep = tp->t_toe;
-	
-	if (tp->t_state != TCPS_SYN_SENT)
-		t3_push_frames(so, 1);
-	
-	if (toep->tp_flags & TP_FIN_SENT) {
-		inp_wunlock(inp);
-		return;
-	}
-
-	tid = toep->tp_tid;
-	    
-	d = TOM_DATA(toep->tp_toedev);
-	
-	m = m_gethdr_nofail(sizeof(*req));
-	m_set_priority(m, CPL_PRIORITY_DATA);
-	m_set_sgl(m, NULL);
-	m_set_sgllen(m, 0);
-
-	toep->tp_flags |= TP_FIN_SENT;
-	req = mtod(m, struct cpl_close_con_req *);
-	
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
-	req->wr.wr_lo = htonl(V_WR_TID(tid));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
-	req->rsvd = 0;
-	inp_wunlock(inp);
-	/*
-	 * XXX - need to defer shutdown while there is still data in the queue
-	 *
-	 */
-	CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid);
-	cxgb_ofld_send(d->cdev, m);
-
-}
-
-/*
- * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
- * and send it along.
- */
-static void
-abort_arp_failure(struct t3cdev *cdev, struct mbuf *m)
-{
-	struct cpl_abort_req *req = cplhdr(m);
-
-	req->cmd = CPL_ABORT_NO_RST;
-	cxgb_ofld_send(cdev, m);
-}
-
-/*
- * Send RX credits through an RX_DATA_ACK CPL message.  If nofail is 0 we are
- * permitted to return without sending the message in case we cannot allocate
- * an sk_buff.  Returns the number of credits sent.
- */
-uint32_t
-t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail)
+static int
+send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
 {
 	struct mbuf *m;
 	struct cpl_rx_data_ack *req;
-	struct toepcb *toep = tp->t_toe;
-	struct toedev *tdev = toep->tp_toedev;
-	
-	m = m_gethdr_nofail(sizeof(*req));
+	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
 
-	DPRINTF("returning %u credits to HW\n", credits);
-	
-	req = mtod(m, struct cpl_rx_data_ack *);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = 0;
+	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_CONTROL, req);
+	if (m == NULL)
+		return (0);
+
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	req->wr.wrh_lo = 0;
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
 	req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
-	m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 
-	cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
+	t3_offload_tx(sc, m);
 	return (credits);
 }
 
-/*
- * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled.
- * This is only used in DDP mode, so we take the opportunity to also set the
- * DACK mode and flush any Rx credits.
- */
 void
-t3_send_rx_modulate(struct toepcb *toep)
+t3_rcvd(struct toedev *tod, struct tcpcb *tp)
 {
-	struct mbuf *m;
-	struct cpl_rx_data_ack *req;
+	struct adapter *sc = tod->tod_softc;
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
+	struct sockbuf *so_rcv = &so->so_rcv;
+	struct toepcb *toep = tp->t_toe;
+	int must_send;
 
-	m = m_gethdr_nofail(sizeof(*req));
+	INP_WLOCK_ASSERT(inp);
 
-	req = mtod(m, struct cpl_rx_data_ack *);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = 0;
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-	
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
-	req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
-				 V_RX_DACK_MODE(1) |
-				 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup));
-	m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-	cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-	toep->tp_rcv_wup = toep->tp_copied_seq;
+	SOCKBUF_LOCK(so_rcv);
+	KASSERT(toep->tp_enqueued >= so_rcv->sb_cc,
+	    ("%s: so_rcv->sb_cc > enqueued", __func__));
+	toep->tp_rx_credits += toep->tp_enqueued - so_rcv->sb_cc;
+	toep->tp_enqueued = so_rcv->sb_cc;
+	SOCKBUF_UNLOCK(so_rcv);
+
+	must_send = toep->tp_rx_credits + 16384 >= tp->rcv_wnd;
+	if (must_send || toep->tp_rx_credits >= 15 * 1024) {
+		int credits;
+
+		credits = send_rx_credits(sc, toep, toep->tp_rx_credits);
+		toep->tp_rx_credits -= credits;
+		tp->rcv_wnd += credits;
+		tp->rcv_adv += credits;
+	}
 }
 
-/*
- * Handle receipt of an urgent pointer.
- */
-static void
-handle_urg_ptr(struct socket *so, uint32_t urg_seq)
+static int
+do_rx_urg_notify(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-#ifdef URGENT_DATA_SUPPORTED
-	struct tcpcb *tp = so_sototcpcb(so);
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct cpl_rx_urg_notify *hdr = mtod(m, void *);
+	unsigned int tid = GET_TID(hdr);
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 
-	urg_seq--;   /* initially points past the urgent data, per BSD */
-
-	if (tp->urg_data && !after(urg_seq, tp->urg_seq))
-		return;                                 /* duplicate pointer */
-	sk_send_sigurg(sk);
-	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
-	    !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
-		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
-
-		tp->copied_seq++;
-		if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len)
-			tom_eat_skb(sk, skb, 0);
-	}
-	tp->urg_data = TCP_URG_NOTYET;
-	tp->urg_seq = urg_seq;
-#endif
-}
-
-/*
- * Returns true if a socket cannot accept new Rx data.
- */
-static inline int
-so_no_receive(const struct socket *so)
-{
-	return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
-}
-
-/*
- * Process an urgent data notification.
- */
-static void
-rx_urg_notify(struct toepcb *toep, struct mbuf *m)
-{
-	struct cpl_rx_urg_notify *hdr = cplhdr(m);
-	struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
-
-	VALIDATE_SOCK(so);
-
-	if (!so_no_receive(so))
-		handle_urg_ptr(so, ntohl(hdr->seq));
+	log(LOG_ERR, "%s: tid %u inp %p", __func__, tid, toep->tp_inp);
 
 	m_freem(m);
-}
-
-/*
- * Handler for RX_URG_NOTIFY CPL messages.
- */
-static int
-do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct toepcb *toep = (struct toepcb *)ctx;
-
-	rx_urg_notify(toep, m);
 	return (0);
 }
 
-static __inline int
-is_delack_mode_valid(struct toedev *dev, struct toepcb *toep)
-{
-	return (toep->tp_ulp_mode ||
-		(toep->tp_ulp_mode == ULP_MODE_TCPDDP &&
-		    dev->tod_ttid >= TOE_ID_CHELSIO_T3));
-}
-
-/*
- * Set of states for which we should return RX credits.
- */
-#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
-
-/*
- * Called after some received data has been read.  It returns RX credits
- * to the HW for the amount of data processed.
- */
-void
-t3_cleanup_rbuf(struct tcpcb *tp, int copied)
+int
+t3_send_fin(struct toedev *tod, struct tcpcb *tp)
 {
 	struct toepcb *toep = tp->t_toe;
-	struct socket *so;
-	struct toedev *dev;
-	int dack_mode, must_send, read;
-	u32 thres, credits, dack = 0;
-	struct sockbuf *rcv;
-	
-	so = inp_inpcbtosocket(tp->t_inpcb);
-	rcv = so_sockbuf_rcv(so);
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp_inpcbtosocket(inp);
+#if defined(KTR)
+	unsigned int tid = toep->tp_tid;
+#endif
 
-	if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
-		(tp->t_state == TCPS_FIN_WAIT_2))) {
-		if (copied) {
-			sockbuf_lock(rcv);
-			toep->tp_copied_seq += copied;
-			sockbuf_unlock(rcv);
-		}
-		
-		return;
-	}
-	
-	inp_lock_assert(tp->t_inpcb); 
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
 
-	sockbuf_lock(rcv);
-	if (copied)
-		toep->tp_copied_seq += copied;
-	else {
-		read = toep->tp_enqueued_bytes - rcv->sb_cc;
-		toep->tp_copied_seq += read;
-	}
-	credits = toep->tp_copied_seq - toep->tp_rcv_wup;
-	toep->tp_enqueued_bytes = rcv->sb_cc;
-	sockbuf_unlock(rcv);
+	CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
+	    toep->tp_flags);
 
-	if (credits > rcv->sb_mbmax) {
-		log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n",
-		    toep->tp_copied_seq, toep->tp_rcv_wup, credits);
-	    credits = rcv->sb_mbmax;
-	}
-	
-	    
-	/*
-	 * XXX this won't accurately reflect credit return - we need
-	 * to look at the difference between the amount that has been 
-	 * put in the recv sockbuf and what is there now
-	 */
+	toep->tp_flags |= TP_SEND_FIN;
+	t3_push_frames(so, 1);
 
-	if (__predict_false(!credits))
-		return;
-
-	dev = toep->tp_toedev;
-	thres = TOM_TUNABLE(dev, rx_credit_thres);
-
-	if (__predict_false(thres == 0))
-		return;
-
-	if (is_delack_mode_valid(dev, toep)) {
-		dack_mode = TOM_TUNABLE(dev, delack);
-		if (__predict_false(dack_mode != toep->tp_delack_mode)) {
-			u32 r = tp->rcv_nxt - toep->tp_delack_seq;
-
-			if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp)
-				dack = F_RX_DACK_CHANGE |
-				       V_RX_DACK_MODE(dack_mode);
-		}
-	} else 
-		dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
-		
-	/*
-	 * For coalescing to work effectively ensure the receive window has
-	 * at least 16KB left.
-	 */
-	must_send = credits + 16384 >= tp->rcv_wnd;
-
-	if (must_send || credits >= thres)
-		toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send);
-}
-
-static int
-cxgb_toe_disconnect(struct tcpcb *tp)
-{
-	struct socket *so;
-	
-	DPRINTF("cxgb_toe_disconnect\n");
-
-	so = inp_inpcbtosocket(tp->t_inpcb);
-	close_conn(so);
 	return (0);
 }
 
-static int
-cxgb_toe_reset(struct tcpcb *tp)
+int
+t3_tod_output(struct toedev *tod, struct tcpcb *tp)
 {
-	struct toepcb *toep = tp->t_toe;
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
 
-	t3_send_reset(toep);
-
-	/*
-	 * unhook from socket
-	 */
-	tp->t_flags &= ~TF_TOE;
-	toep->tp_tp = NULL;
-	tp->t_toe = NULL;
-	return (0);
-}
-
-static int
-cxgb_toe_send(struct tcpcb *tp)
-{
-	struct socket *so;
-	
-	DPRINTF("cxgb_toe_send\n");
-	dump_toepcb(tp->t_toe);
-
-	so = inp_inpcbtosocket(tp->t_inpcb);
 	t3_push_frames(so, 1);
 	return (0);
 }
 
-static int
-cxgb_toe_rcvd(struct tcpcb *tp)
+/* What mtu_idx to use, given a 4-tuple and/or an MSS cap */
+int
+find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
 {
+	unsigned short *mtus = &sc->params.mtus[0];
+	int i = 0, mss;
 
-	inp_lock_assert(tp->t_inpcb);
+	KASSERT(inc != NULL || pmss > 0,
+	    ("%s: at least one of inc/pmss must be specified", __func__));
 
-	t3_cleanup_rbuf(tp, 0);
-	
-	return (0);
+	mss = inc ? tcp_mssopt(inc) : pmss;
+	if (pmss > 0 && mss > pmss)
+		mss = pmss;
+
+	while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40)
+		++i;
+
+	return (i);
 }
 
-static void
-cxgb_toe_detach(struct tcpcb *tp)
-{
-	struct toepcb *toep;
-
-        /*
-	 * XXX how do we handle teardown in the SYN_SENT state?
-	 *
-	 */
-	inp_lock_assert(tp->t_inpcb);
-	toep = tp->t_toe;
-	toep->tp_tp = NULL;
-
-	/*
-	 * unhook from socket
-	 */
-	tp->t_flags &= ~TF_TOE;
-	tp->t_toe = NULL;
-}
-	
-
-static struct toe_usrreqs cxgb_toe_usrreqs = {
-	.tu_disconnect = cxgb_toe_disconnect,
-	.tu_reset = cxgb_toe_reset,
-	.tu_send = cxgb_toe_send,
-	.tu_rcvd = cxgb_toe_rcvd,
-	.tu_detach = cxgb_toe_detach,
-	.tu_detach = cxgb_toe_detach,
-	.tu_syncache_event = handle_syncache_event,
-};
-
-
-static void
-__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word,
-			    uint64_t mask, uint64_t val, int no_reply)
-{
-	struct cpl_set_tcb_field *req;
-
-	CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
-	    toep->tp_tid, word, mask, val);
-
-	req = mtod(m, struct cpl_set_tcb_field *);
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = 0;
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid));
-	req->reply = V_NO_REPLY(no_reply);
-	req->cpu_idx = 0;
-	req->word = htons(word);
-	req->mask = htobe64(mask);
-	req->val = htobe64(val);
-
-	m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-	send_or_defer(toep, m, 0);
-}
-
-static void
-t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val)
+static inline void
+purge_wr_queue(struct toepcb *toep)
 {
 	struct mbuf *m;
-	struct tcpcb *tp = toep->tp_tp;
-	
-	if (toep == NULL)
-		return;
- 
-	if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) {
-		printf("not seting field\n");
-		return;
+	struct ofld_hdr *oh;
+
+	while ((m = mbufq_dequeue(&toep->wr_list)) != NULL) {
+		oh = mtod(m, struct ofld_hdr *);
+		if (oh->flags & F_HDR_SGL)
+			sglist_free(oh->sgl);
+		m_freem(m);
 	}
-	
-	m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field));
-
-	__set_tcb_field(toep, m, word, mask, val, 1);
 }
 
 /*
- * Set one of the t_flags bits in the TCB.
- */
-static void
-set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val)
-{
-
-	t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos);
-}
-
-/*
- * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting.
- */
-static void
-t3_set_nagle(struct toepcb *toep)
-{
-	struct tcpcb *tp = toep->tp_tp;
-	
-	set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY));
-}
-
-/*
- * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting.
- */
-void
-t3_set_keepalive(struct toepcb *toep, int on_off)
-{
-
-	set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off);
-}
-
-void
-t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off)
-{
-	set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off);
-}
-
-void
-t3_set_dack_mss(struct toepcb *toep, int on_off)
-{
-
-	set_tcb_tflag(toep, S_TF_DACK_MSS, on_off);
-}
-
-/*
- * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting.
- */
-static void
-t3_set_tos(struct toepcb *toep)
-{
-	int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb);	
-	
-	t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS),
-			 V_TCB_TOS(tos));
-}
-
-
-/*
- * In DDP mode, TP fails to schedule a timer to push RX data to the host when
- * DDP is disabled (data is delivered to freelist). [Note that, the peer should
- * set the PSH bit in the last segment, which would trigger delivery.]
- * We work around the issue by setting a DDP buffer in a partial placed state,
- * which guarantees that TP will schedule a timer.
- */
-#define TP_DDP_TIMER_WORKAROUND_MASK\
-    (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\
-     ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\
-       V_TCB_RX_DDP_BUF0_LEN(3)) << 32))
-#define TP_DDP_TIMER_WORKAROUND_VAL\
-    (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\
-     ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\
-      32))
-
-static void
-t3_enable_ddp(struct toepcb *toep, int on)
-{
-	if (on) {
-		
-		t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1),
-				 V_TF_DDP_OFF(0));
-	} else
-		t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS,
-				 V_TF_DDP_OFF(1) |
-				 TP_DDP_TIMER_WORKAROUND_MASK,
-				 V_TF_DDP_OFF(1) |
-				 TP_DDP_TIMER_WORKAROUND_VAL);
-
-}
-
-void
-t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color)
-{
-	t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx,
-			 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
-			 tag_color);
-}
-
-void
-t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
-		    unsigned int len)
-{
-	if (buf_idx == 0)
-		t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET,
-			 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
-			 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
-			 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) |
-			 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
-	else
-		t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET,
-			 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
-			 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32),
-			 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) |
-			 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32));
-}
-
-static int
-t3_set_cong_control(struct socket *so, const char *name)
-{
-#ifdef CONGESTION_CONTROL_SUPPORTED	
-	int cong_algo;
-
-	for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++)
-		if (!strcmp(name, t3_cong_ops[cong_algo].name))
-			break;
-
-	if (cong_algo >= ARRAY_SIZE(t3_cong_ops))
-		return -EINVAL;
-#endif
-	return 0;
-}
-
-int
-t3_get_tcb(struct toepcb *toep)
-{
-	struct cpl_get_tcb *req;
-	struct tcpcb *tp = toep->tp_tp;
-	struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
-
-	if (!m)
-		return (ENOMEM);
-	
-	inp_lock_assert(tp->t_inpcb);	
-	m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-	req = mtod(m, struct cpl_get_tcb *);
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = 0;
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid));
-	req->cpuno = htons(toep->tp_qset);
-	req->rsvd = 0;
-	if (tp->t_state == TCPS_SYN_SENT)
-		mbufq_tail(&toep->out_of_order_queue, m);	// defer
-	else
-		cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-	return 0;
-}
-
-static inline void
-so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid)
-{
-
-	toepcb_hold(toep);
-
-	cxgb_insert_tid(d->cdev, d->client, toep, tid);
-}
-
-/**
- *	find_best_mtu - find the entry in the MTU table closest to an MTU
- *	@d: TOM state
- *	@mtu: the target MTU
- *
- *	Returns the index of the value in the MTU table that is closest to but
- *	does not exceed the target MTU.
- */
-static unsigned int
-find_best_mtu(const struct t3c_data *d, unsigned short mtu)
-{
-	int i = 0;
-
-	while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
-		++i;
-	return (i);
-}
-
-static unsigned int
-select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
-{
-	unsigned int idx;
-	
-#ifdef notyet
-	struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt;
-#endif
-	if (tp) {
-		tp->t_maxseg = pmtu - 40;
-		if (tp->t_maxseg < td->mtus[0] - 40)
-			tp->t_maxseg = td->mtus[0] - 40;
-		idx = find_best_mtu(td, tp->t_maxseg + 40);
-
-		tp->t_maxseg = td->mtus[idx] - 40;
-	} else
-		idx = find_best_mtu(td, pmtu);
-	
-	return (idx);
-}
-
-static inline void
-free_atid(struct t3cdev *cdev, unsigned int tid)
-{
-	struct toepcb *toep = cxgb_free_atid(cdev, tid);
-
-	if (toep)
-		toepcb_release(toep);
-}
-
-/*
- * Release resources held by an offload connection (TID, L2T entry, etc.)
+ * Release cxgb(4) and T3 resources held by an offload connection (TID, L2T
+ * entry, etc.)
  */
 static void
 t3_release_offload_resources(struct toepcb *toep)
 {
-	struct tcpcb *tp = toep->tp_tp;
-	struct toedev *tdev = toep->tp_toedev;
-	struct t3cdev *cdev;
-	struct socket *so;
-	unsigned int tid = toep->tp_tid;
-	struct sockbuf *rcv;
-	
-	CTR0(KTR_TOM, "t3_release_offload_resources");
+	struct toedev *tod = toep->tp_tod;
+	struct tom_data *td = t3_tomdata(tod);
 
-	if (!tdev)
-		return;
-
-	cdev = TOEP_T3C_DEV(toep);
-	if (!cdev)
-		return;
-
-	toep->tp_qset = 0;
-	t3_release_ddp_resources(toep);
-
-#ifdef CTRL_SKB_CACHE
-	kfree_skb(CTRL_SKB_CACHE(tp));
-	CTRL_SKB_CACHE(tp) = NULL;
-#endif
-
-	if (toep->tp_wr_avail != toep->tp_wr_max) {
-		purge_wr_queue(toep);
-		reset_wr_list(toep);
+	/*
+	 * The TOM explicitly detaches its toepcb from the system's inp before
+	 * it releases the offload resources.
+	 */
+	if (toep->tp_inp) {
+		panic("%s: inp %p still attached to toepcb %p",
+		    __func__, toep->tp_inp, toep);
 	}
 
+	if (toep->tp_wr_avail != toep->tp_wr_max)
+		purge_wr_queue(toep);
+
 	if (toep->tp_l2t) {
-		l2t_release(L2DATA(cdev), toep->tp_l2t);
+		l2t_release(td->l2t, toep->tp_l2t);
 		toep->tp_l2t = NULL;
 	}
-	toep->tp_tp = NULL;
-	if (tp) {
-		inp_lock_assert(tp->t_inpcb);
-		so = inp_inpcbtosocket(tp->t_inpcb);
-		rcv = so_sockbuf_rcv(so);		
-		/*
-		 * cancel any offloaded reads
-		 *
-		 */
-		sockbuf_lock(rcv);
-		tp->t_toe = NULL;
-		tp->t_flags &= ~TF_TOE;
-		if (toep->tp_ddp_state.user_ddp_pending) {
-			t3_cancel_ubuf(toep, rcv);
-			toep->tp_ddp_state.user_ddp_pending = 0;
-		}
-		so_sorwakeup_locked(so);
-			
-	}
-	
-	if (toep->tp_state == TCPS_SYN_SENT) {
-		free_atid(cdev, tid);
-#ifdef notyet		
-		__skb_queue_purge(&tp->out_of_order_queue);
-#endif		
-	} else {                                          // we have TID
-		cxgb_remove_tid(cdev, toep, tid);
-		toepcb_release(toep);
-	}
-#if 0
-	log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
-#endif
-}
 
-static void
-install_offload_ops(struct socket *so)
-{
-	struct tcpcb *tp = so_sototcpcb(so);
+	if (toep->tp_tid >= 0)
+		release_tid(tod, toep->tp_tid, toep->tp_qset);
 
-	KASSERT(tp->t_toe != NULL, ("toepcb not set"));
-	
-	t3_install_socket_ops(so);
-	tp->t_flags |= TF_TOE;
-	tp->t_tu = &cxgb_toe_usrreqs;
-}
-
-/*
- * Determine the receive window scaling factor given a target max
- * receive window.
- */
-static __inline int
-select_rcv_wscale(int space, struct vnet *vnet)
-{
-	int wscale = 0;
-
-	if (space > MAX_RCV_WND)
-		space = MAX_RCV_WND;
-
-	if (V_tcp_do_rfc1323)
-		for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ;
-
-	return (wscale);
+	toepcb_free(toep);
 }
 
 /*
  * Determine the receive window size for a socket.
  */
-static unsigned long
-select_rcv_wnd(struct toedev *dev, struct socket *so)
+unsigned long
+select_rcv_wnd(struct socket *so)
 {
-	struct tom_data *d = TOM_DATA(dev);
-	unsigned int wnd;
-	unsigned int max_rcv_wnd;
-	struct sockbuf *rcv;
+	unsigned long wnd;
 
-	rcv = so_sockbuf_rcv(so);
-	
-	if (V_tcp_do_autorcvbuf)
-		wnd = V_tcp_autorcvbuf_max;
-	else
-		wnd = rcv->sb_hiwat;
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
-	
-	
-	/* XXX
-	 * For receive coalescing to work effectively we need a receive window
-	 * that can accomodate a coalesced segment.
-	 */	
+	wnd = sbspace(&so->so_rcv);
 	if (wnd < MIN_RCV_WND)
-		wnd = MIN_RCV_WND; 
-	
-	/* PR 5138 */
-	max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 
-				    (uint32_t)d->rx_page_size * 23 :
-				    MAX_RCV_WND);
-	
-	return min(wnd, max_rcv_wnd);
+		wnd = MIN_RCV_WND;
+
+	return min(wnd, MAX_RCV_WND);
+}
+
+int
+select_rcv_wscale(void)
+{
+	int wscale = 0;
+	unsigned long space = sb_max;
+
+	if (space > MAX_RCV_WND)
+		space = MAX_RCV_WND;
+
+	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
+		wscale++;
+
+	return (wscale);
+}
+
+
+/*
+ * Set up the socket for TCP offload.
+ */
+void
+offload_socket(struct socket *so, struct toepcb *toep)
+{
+	struct toedev *tod = toep->tp_tod;
+	struct tom_data *td = t3_tomdata(tod);
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = intotcpcb(inp);
+
+	INP_WLOCK_ASSERT(inp);
+
+	/* Update socket */
+	SOCKBUF_LOCK(&so->so_snd);
+	so_sockbuf_snd(so)->sb_flags |= SB_NOCOALESCE;
+	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCKBUF_LOCK(&so->so_rcv);
+	so_sockbuf_rcv(so)->sb_flags |= SB_NOCOALESCE;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+
+	/* Update TCP PCB */
+	tp->tod = toep->tp_tod;
+	tp->t_toe = toep;
+	tp->t_flags |= TF_TOE;
+
+	/* Install an extra hold on inp */
+	toep->tp_inp = inp;
+	toep->tp_flags |= TP_ATTACHED;
+	in_pcbref(inp);
+
+	/* Add the TOE PCB to the active list */
+	mtx_lock(&td->toep_list_lock);
+	TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
+	mtx_unlock(&td->toep_list_lock);
+}
+
+/* This is _not_ the normal way to "unoffload" a socket. */
+void
+undo_offload_socket(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = intotcpcb(inp);
+	struct toepcb *toep = tp->t_toe;
+	struct toedev *tod = toep->tp_tod;
+	struct tom_data *td = t3_tomdata(tod);
+
+	INP_WLOCK_ASSERT(inp);
+
+	so_sockbuf_snd(so)->sb_flags &= ~SB_NOCOALESCE;
+	so_sockbuf_rcv(so)->sb_flags &= ~SB_NOCOALESCE;
+
+	tp->tod = NULL;
+	tp->t_toe = NULL;
+	tp->t_flags &= ~TF_TOE;
+
+	toep->tp_inp = NULL;
+	toep->tp_flags &= ~TP_ATTACHED;
+	if (in_pcbrele_wlocked(inp))
+		panic("%s: inp freed.", __func__);
+
+	mtx_lock(&td->toep_list_lock);
+	TAILQ_REMOVE(&td->toep_list, toep, link);
+	mtx_unlock(&td->toep_list_lock);
 }
 
 /*
- * Assign offload parameters to some socket fields.  This code is used by
- * both active and passive opens.
+ * Socket could be a listening socket, and we may not have a toepcb at all at
+ * this time.
  */
-static inline void
-init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid,
-    struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep)
+uint32_t
+calc_opt0h(struct socket *so, int mtu_idx, int rscale, struct l2t_entry *e)
 {
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
-	struct sockbuf *snd, *rcv;
-	
-#ifdef notyet	
-	SOCK_LOCK_ASSERT(so);
-#endif
-	
-	snd = so_sockbuf_snd(so);
-	rcv = so_sockbuf_rcv(so);
-	
-	log(LOG_INFO, "initializing offload socket\n");
-	/*
-	 * We either need to fix push frames to work with sbcompress
-	 * or we need to add this
-	 */
-	snd->sb_flags |= SB_NOCOALESCE;
-	rcv->sb_flags |= SB_NOCOALESCE;
-	
-	tp->t_toe = toep;
-	toep->tp_tp = tp;
-	toep->tp_toedev = dev;
-	
-	toep->tp_tid = tid;
-	toep->tp_l2t = e;
-	toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs);
-	toep->tp_wr_unacked = 0;
-	toep->tp_delack_mode = 0;
-	
-	toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
-	/*
-	 * XXX broken
-	 * 
-	 */
-	tp->rcv_wnd = select_rcv_wnd(dev, so);
+	uint32_t opt0h = F_TCAM_BYPASS | V_WND_SCALE(rscale) |
+	    V_MSS_IDX(mtu_idx);
 
-        toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
-		       tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
-	toep->tp_qset_idx = 0;
-	
-	reset_wr_list(toep);
-	DPRINTF("initialization done\n");
+	if (so != NULL) {
+		struct inpcb *inp = sotoinpcb(so);
+		struct tcpcb *tp = intotcpcb(inp);
+		int keepalive = always_keepalive ||
+		    so_options_get(so) & SO_KEEPALIVE;
+
+		opt0h |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
+		opt0h |= V_KEEP_ALIVE(keepalive != 0);
+	}
+
+	if (e != NULL)
+		opt0h |= V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx);
+
+	return (htobe32(opt0h));
 }
 
-/*
- * The next two functions calculate the option 0 value for a socket.
- */
-static inline unsigned int
-calc_opt0h(struct socket *so, int mtu_idx)
+uint32_t
+calc_opt0l(struct socket *so, int rcv_bufsize)
 {
-	struct tcpcb *tp = so_sototcpcb(so);
-	int wscale = select_rcv_wscale(tp->rcv_wnd, so->so_vnet);
-	
-	return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
-	    V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
-	    V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
+	uint32_t opt0l = V_ULP_MODE(ULP_MODE_NONE) | V_RCV_BUFSIZ(rcv_bufsize);
+
+	KASSERT(rcv_bufsize <= M_RCV_BUFSIZ,
+	    ("%s: rcv_bufsize (%d) is too high", __func__, rcv_bufsize));
+
+	if (so != NULL)		/* optional because noone cares about IP TOS */
+		opt0l |= V_TOS(INP_TOS(sotoinpcb(so)));
+
+	return (htobe32(opt0l));
 }
 
-static inline unsigned int
-calc_opt0l(struct socket *so, int ulp_mode)
-{
-	struct tcpcb *tp = so_sototcpcb(so);
-	unsigned int val;
-	
-	val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) |
-	       V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
-
-	DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val);
-	return (val);
-}
-
-static inline unsigned int
-calc_opt2(const struct socket *so, struct toedev *dev)
-{
-	int flv_valid;
-
-	flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
-
-	return (V_FLAVORS_VALID(flv_valid) |
-	    V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0));
-}
-
-#if DEBUG_WR > 1
-static int
-count_pending_wrs(const struct toepcb *toep)
-{
-	const struct mbuf *m;
-	int n = 0;
-
-	wr_queue_walk(toep, m)
-		n += m->m_pkthdr.csum_data;
-	return (n);
-}
-#endif
-
-#if 0
-(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
-#endif
-	
-static void
-mk_act_open_req(struct socket *so, struct mbuf *m,
-    unsigned int atid, const struct l2t_entry *e)
-{
-	struct cpl_act_open_req *req;
-	struct inpcb *inp = so_sotoinpcb(so);
-	struct tcpcb *tp = inp_inpcbtotcpcb(inp);
-	struct toepcb *toep = tp->t_toe;
-	struct toedev *tdev = toep->tp_toedev;
-	
-	m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep));
-	
-	req = mtod(m, struct cpl_act_open_req *);
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = 0;
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
-	inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port);
-#if 0	
-	req->local_port = inp->inp_lport;
-	req->peer_port = inp->inp_fport;
-	memcpy(&req->local_ip, &inp->inp_laddr, 4);
-	memcpy(&req->peer_ip, &inp->inp_faddr, 4);
-#endif	
-	req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
-			   V_TX_CHANNEL(e->smt_idx));
-	req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
-	req->params = 0;
-	req->opt2 = htonl(calc_opt2(so, tdev));
-}
-
-
 /*
  * Convert an ACT_OPEN_RPL status to an errno.
  */
@@ -1422,61 +889,6 @@
 	}
 }
 
-static void
-fail_act_open(struct toepcb *toep, int errno)
-{
-	struct tcpcb *tp = toep->tp_tp;
-
-	t3_release_offload_resources(toep);
-	if (tp) {
-		inp_wunlock(tp->t_inpcb);		
-		tcp_offload_drop(tp, errno);
-	}
-	
-#ifdef notyet
-	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
-#endif
-}
-
-/*
- * Handle active open failures.
- */
-static void
-active_open_failed(struct toepcb *toep, struct mbuf *m)
-{
-	struct cpl_act_open_rpl *rpl = cplhdr(m);
-	struct inpcb *inp;
-
-	if (toep->tp_tp == NULL)
-		goto done;
-
-	inp = toep->tp_tp->t_inpcb;
-
-/*
- * Don't handle connection retry for now
- */
-#ifdef notyet
-	struct inet_connection_sock *icsk = inet_csk(sk);
-
-	if (rpl->status == CPL_ERR_CONN_EXIST &&
-	    icsk->icsk_retransmit_timer.function != act_open_retry_timer) {
-		icsk->icsk_retransmit_timer.function = act_open_retry_timer;
-		sk_reset_timer(so, &icsk->icsk_retransmit_timer,
-			       jiffies + HZ / 2);
-	} else
-#endif
-	{
-		inp_wlock(inp);
-		/*
-		 * drops the inpcb lock
-		 */
-		fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
-	}
-	
-	done:
-	m_free(m);
-}
-
 /*
  * Return whether a failed active open has allocated a TID
  */
@@ -1488,1072 +900,350 @@
 }
 
 /*
- * Process an ACT_OPEN_RPL CPL message.
+ * Active open failed.
  */
 static int
-do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+do_act_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	struct toepcb *toep = (struct toepcb *)ctx;
-	struct cpl_act_open_rpl *rpl = cplhdr(m);
-	
-	if (cdev->type != T3A && act_open_has_tid(rpl->status))
-		cxgb_queue_tid_release(cdev, GET_TID(rpl));
-	
-	active_open_failed(toep, m);
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct toedev *tod = &td->tod;
+	struct cpl_act_open_rpl *rpl = mtod(m, void *);
+	unsigned int atid = G_TID(ntohl(rpl->atid));
+	struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
+	struct inpcb *inp = toep->tp_inp;
+	struct tcpcb *tp = intotcpcb(inp);
+	int s = rpl->status;
+
+	CTR3(KTR_CXGB, "%s: atid %u, status %u ", __func__, atid, s);
+
+	free_atid(&td->tid_maps, atid);
+	toep->tp_tid = -1;
+
+	if (act_open_has_tid(s))
+		queue_tid_release(tod, GET_TID(rpl));
+
+	if (s == CPL_ERR_TCAM_FULL || s == CPL_ERR_CONN_EXIST) {
+		INP_WLOCK(inp);
+		toe_connect_failed(tod, tp, EAGAIN);
+		toepcb_release(toep);	/* unlocks inp */
+	} else {
+		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_WLOCK(inp);
+		toe_connect_failed(tod, tp, act_open_rpl_status_to_errno(s));
+		toepcb_release(toep);	/* unlocks inp */
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	}
+
+	m_freem(m);
 	return (0);
 }
 
 /*
- * Handle an ARP failure for an active open.   XXX purge ofo queue
+ * Send an active open request.
  *
- * XXX badly broken for crossed SYNs as the ATID is no longer valid.
- * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should
- * check SOCK_DEAD or sk->sk_sock.  Or maybe generate the error here but don't
- * free the atid.  Hmm.
- */
-#ifdef notyet
-static void
-act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m)
-{
-	struct toepcb *toep = m_get_toep(m);
-	struct tcpcb *tp = toep->tp_tp;
-	struct inpcb *inp = tp->t_inpcb;
-	struct socket *so;
-	
-	inp_wlock(inp);
-	if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) {
-		/*
-		 * drops the inpcb lock
-		 */
-		fail_act_open(so, EHOSTUNREACH);
-		printf("freeing %p\n", m);
-		
-		m_free(m);
-	} else
-		inp_wunlock(inp);
-}
-#endif
-/*
- * Send an active open request.
+ * State of affairs on entry:
+ * soisconnecting (so_state |= SS_ISCONNECTING)
+ * tcbinfo not locked (this has changed - used to be WLOCKed)
+ * inp WLOCKed
+ * tp->t_state = TCPS_SYN_SENT
+ * rtalloc1, RT_UNLOCK on rt.
  */
 int
-t3_connect(struct toedev *tdev, struct socket *so,
+t3_connect(struct toedev *tod, struct socket *so,
     struct rtentry *rt, struct sockaddr *nam)
 {
-	struct mbuf *m;
-	struct l2t_entry *e;
-	struct tom_data *d = TOM_DATA(tdev);
-	struct inpcb *inp = so_sotoinpcb(so);
+	struct mbuf *m = NULL;
+	struct l2t_entry *e = NULL;
+	struct tom_data *td = t3_tomdata(tod);
+	struct adapter *sc = tod->tod_softc;
+	struct cpl_act_open_req *cpl;
+	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
-	struct toepcb *toep; /* allocated by init_offload_socket */
-		
-	int atid;
+	struct toepcb *toep;
+	int atid = -1, mtu_idx, rscale, cpu_idx, qset;
+	struct sockaddr *gw;
+	struct ifnet *ifp = rt->rt_ifp;
+	struct port_info *pi = ifp->if_softc;	/* XXX wrong for VLAN etc. */
 
-	toep = toepcb_alloc();
+	INP_WLOCK_ASSERT(inp);
+
+	toep = toepcb_alloc(tod);
 	if (toep == NULL)
-		goto out_err;
-	
-	if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
-		goto out_err;
-	
-	e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam);
-	if (!e)
-		goto free_tid;
+		goto failed;
 
-	inp_lock_assert(inp);
-	m = m_gethdr(MT_DATA, M_WAITOK);
-	
-#if 0	
-	m->m_toe.mt_toepcb = tp->t_toe;
-	set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure);
-#endif
-	so_lock(so);
-	
-	init_offload_socket(so, tdev, atid, e, rt, toep);
-	
-	install_offload_ops(so);
-	
-	mk_act_open_req(so, m, atid, e);
-	so_unlock(so);
-	
-	soisconnecting(so);
-	toep = tp->t_toe;
-	m_set_toep(m, tp->t_toe);
-	
-	toep->tp_state = TCPS_SYN_SENT;
-	l2t_send(d->cdev, (struct mbuf *)m, e);
+	atid = alloc_atid(&td->tid_maps, toep);
+	if (atid < 0)
+		goto failed;
 
-	if (toep->tp_ulp_mode)
-		t3_enable_ddp(toep, 0);
-	return 	(0);
-	
-free_tid:
-	printf("failing connect - free atid\n");
-	
-	free_atid(d->cdev, atid);
-out_err:
-	printf("return ENOMEM\n");
-       return (ENOMEM);
+	qset = pi->first_qset + (arc4random() % pi->nqsets);
+
+	m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
+	if (m == NULL)
+		goto failed;
+
+	gw = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam;
+	e = t3_l2t_get(pi, ifp, gw);
+	if (e == NULL)
+		goto failed;
+
+	toep->tp_l2t = e;
+	toep->tp_tid = atid;	/* used to double check response */
+	toep->tp_qset = qset;
+
+	SOCKBUF_LOCK(&so->so_rcv);
+	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
+	toep->tp_rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+
+	offload_socket(so, toep);
+
+	/*
+	 * The kernel sets request_r_scale based on sb_max whereas we need to
+	 * take hardware's MAX_RCV_WND into account too.  This is normally a
+	 * no-op as MAX_RCV_WND is much larger than the default sb_max.
+	 */
+	if (tp->t_flags & TF_REQ_SCALE)
+		rscale = tp->request_r_scale = select_rcv_wscale();
+	else
+		rscale = 0;
+	mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
+	cpu_idx = sc->rrss_map[qset];
+
+	cpl->wr.wrh_hi = htobe32(V_WR_OP(FW_WROPCODE_FORWARD));
+	cpl->wr.wrh_lo = 0;
+	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 
+	inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip,
+	    &cpl->peer_port);
+	cpl->opt0h = calc_opt0h(so, mtu_idx, rscale, e);
+	cpl->opt0l = calc_opt0l(so, toep->tp_rx_credits);
+	cpl->params = 0;
+	cpl->opt2 = calc_opt2(cpu_idx);
+
+	CTR5(KTR_CXGB, "%s: atid %u (%s), toep %p, inp %p", __func__,
+	    toep->tp_tid, tcpstates[tp->t_state], toep, inp);
+
+	if (l2t_send(sc, m, e) == 0)
+		return (0);
+
+	undo_offload_socket(so);
+
+failed:
+	CTR5(KTR_CXGB, "%s: FAILED, atid %d, toep %p, l2te %p, mbuf %p",
+	    __func__, atid, toep, e, m);
+
+	if (atid >= 0)
+		free_atid(&td->tid_maps, atid);
+
+	if (e)
+		l2t_release(td->l2t, e);
+
+	if (toep)
+		toepcb_free(toep);
+
+	m_freem(m);
+
+	return (ENOMEM);
 }
 
 /*
- * Send an ABORT_REQ message.  Cannot fail.  This routine makes sure we do
- * not send multiple ABORT_REQs for the same connection and also that we do
- * not try to send a message after the connection has closed.  Returns 1 if
- * an ABORT_REQ wasn't generated after all, 0 otherwise.
+ * Send an ABORT_REQ message.  Cannot fail.  This routine makes sure we do not
+ * send multiple ABORT_REQs for the same connection and also that we do not try
+ * to send a message after the connection has closed.
  */
 static void
-t3_send_reset(struct toepcb *toep)
+send_reset(struct toepcb *toep)
 {
-	
+
 	struct cpl_abort_req *req;
 	unsigned int tid = toep->tp_tid;
-	int mode = CPL_ABORT_SEND_RST;
-	struct tcpcb *tp = toep->tp_tp;
-	struct toedev *tdev = toep->tp_toedev;
-	struct socket *so = NULL;
+	struct inpcb *inp = toep->tp_inp;
+	struct socket *so = inp->inp_socket;
+	struct tcpcb *tp = intotcpcb(inp);
+	struct toedev *tod = toep->tp_tod;
+	struct adapter *sc = tod->tod_softc;
 	struct mbuf *m;
-	struct sockbuf *snd;
-	
-	if (tp) {
-		inp_lock_assert(tp->t_inpcb);
-		so = inp_inpcbtosocket(tp->t_inpcb);
-	}
-	
-	if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) ||
-		tdev == NULL))
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
+	    toep->tp_flags);
+
+	if (toep->tp_flags & TP_ABORT_SHUTDOWN)
 		return;
-	toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN);
 
-	snd = so_sockbuf_snd(so);
-	/* Purge the send queue so we don't send anything after an abort. */
-	if (so)
-		sbflush(snd);
-	if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev))
-		mode |= CPL_ABORT_POST_CLOSE_REQ;
+	toep->tp_flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
 
-	m = m_gethdr_nofail(sizeof(*req));
-	m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep));
-	set_arp_failure_handler(m, abort_arp_failure);
+	/* Purge the send queue */
+	sbflush(so_sockbuf_snd(so));
+	purge_wr_queue(toep);
 
-	req = mtod(m, struct cpl_abort_req *);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
-	req->wr.wr_lo = htonl(V_WR_TID(tid));
+	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
+	if (m == NULL)
+		CXGB_UNIMPLEMENTED();
+
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+	req->wr.wrh_lo = htonl(V_WR_TID(tid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
-	req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0;
+	req->rsvd0 = htonl(tp->snd_nxt);
 	req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
-	req->cmd = mode;
-	if (tp && (tp->t_state == TCPS_SYN_SENT))
-		mbufq_tail(&toep->out_of_order_queue, m);	// defer
+	req->cmd = CPL_ABORT_SEND_RST;
+
+	if (tp->t_state == TCPS_SYN_SENT)
+		mbufq_tail(&toep->out_of_order_queue, m); /* defer */
 	else
-		l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);
-}
-
-static int
-t3_ip_ctloutput(struct socket *so, struct sockopt *sopt)
-{
-	struct inpcb *inp;
-	int error, optval;
-	
-	if (sopt->sopt_name == IP_OPTIONS)
-		return (ENOPROTOOPT);
-
-	if (sopt->sopt_name != IP_TOS)
-		return (EOPNOTSUPP);
-	
-	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
-
-	if (error)
-		return (error);
-
-	if (optval > IPTOS_PREC_CRITIC_ECP)
-		return (EINVAL);
-
-	inp = so_sotoinpcb(so);
-	inp_wlock(inp);
-	inp_ip_tos_set(inp, optval);
-#if 0	
-	inp->inp_ip_tos = optval;
-#endif
-	t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe);
-	inp_wunlock(inp);
-
-	return (0);
-}
-
-static int
-t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt)
-{
-	int err = 0;
-	size_t copied;
-
-	if (sopt->sopt_name != TCP_CONGESTION &&
-	    sopt->sopt_name != TCP_NODELAY)
-		return (EOPNOTSUPP);
-
-	if (sopt->sopt_name == TCP_CONGESTION) {
-		char name[TCP_CA_NAME_MAX];
-		int optlen = sopt->sopt_valsize;
-		struct tcpcb *tp;
-		
-		if (sopt->sopt_dir == SOPT_GET) {
-			KASSERT(0, ("unimplemented"));
-			return (EOPNOTSUPP);
-		}
-
-		if (optlen < 1)
-			return (EINVAL);
-		
-		err = copyinstr(sopt->sopt_val, name, 
-		    min(TCP_CA_NAME_MAX - 1, optlen), &copied);
-		if (err)
-			return (err);
-		if (copied < 1)
-			return (EINVAL);
-
-		tp = so_sototcpcb(so);
-		/*
-		 * XXX I need to revisit this
-		 */
-		if ((err = t3_set_cong_control(so, name)) == 0) {
-#ifdef CONGESTION_CONTROL_SUPPORTED
-			tp->t_cong_control = strdup(name, M_CXGB);
-#endif			
-		} else
-			return (err);
-	} else {
-		int optval, oldval;
-		struct inpcb *inp;
-		struct tcpcb *tp;
-
-		if (sopt->sopt_dir == SOPT_GET)
-			return (EOPNOTSUPP);
-	
-		err = sooptcopyin(sopt, &optval, sizeof optval,
-		    sizeof optval);
-
-		if (err)
-			return (err);
-
-		inp = so_sotoinpcb(so);
-		inp_wlock(inp);
-		tp = inp_inpcbtotcpcb(inp);
-
-		oldval = tp->t_flags;
-		if (optval)
-			tp->t_flags |= TF_NODELAY;
-		else
-			tp->t_flags &= ~TF_NODELAY;
-		inp_wunlock(inp);
-
-
-		if (oldval != tp->t_flags && (tp->t_toe != NULL))
-			t3_set_nagle(tp->t_toe);
-
-	}
-
-	return (0);
+		l2t_send(sc, m, toep->tp_l2t);
 }
 
 int
-t3_ctloutput(struct socket *so, struct sockopt *sopt)
+t3_send_rst(struct toedev *tod __unused, struct tcpcb *tp)
 {
-	int err;
 
-	if (sopt->sopt_level != IPPROTO_TCP) 
-		err =  t3_ip_ctloutput(so, sopt);
-	else
-		err = t3_tcp_ctloutput(so, sopt);
-
-	if (err != EOPNOTSUPP)
-		return (err);
-
-	return (tcp_ctloutput(so, sopt));
-}
-
-/*
- * Returns true if we need to explicitly request RST when we receive new data
- * on an RX-closed connection.
- */
-static inline int
-need_rst_on_excess_rx(const struct toepcb *toep)
-{
-	return (1);
-}
-
-/*
- * Handles Rx data that arrives in a state where the socket isn't accepting
- * new data.
- */
-static void
-handle_excess_rx(struct toepcb *toep, struct mbuf *m)
-{
-	
-	if (need_rst_on_excess_rx(toep) &&
-	    !(toep->tp_flags & TP_ABORT_SHUTDOWN))
-		t3_send_reset(toep);
-	m_freem(m); 
-}
-
-/*
- * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
- * by getting the DDP offset from the TCB.
- */
-static void
-tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
-{
-	struct ddp_state *q = &toep->tp_ddp_state;
-	struct ddp_buf_state *bsp;
-	struct cpl_get_tcb_rpl *hdr;
-	unsigned int ddp_offset;
-	struct socket *so;
-	struct tcpcb *tp;
-	struct sockbuf *rcv;	
-	int state;
-	
-	uint64_t t;
-	__be64 *tcb;
-
-	tp = toep->tp_tp;
-	so = inp_inpcbtosocket(tp->t_inpcb);
-
-	inp_lock_assert(tp->t_inpcb);
-	rcv = so_sockbuf_rcv(so);
-	sockbuf_lock(rcv);	
-	
-	/* Note that we only accout for CPL_GET_TCB issued by the DDP code.
-	 * We really need a cookie in order to dispatch the RPLs.
-	 */
-	q->get_tcb_count--;
-
-	/* It is a possible that a previous CPL already invalidated UBUF DDP
-	 * and moved the cur_buf idx and hence no further processing of this
-	 * skb is required. However, the app might be sleeping on
-	 * !q->get_tcb_count and we need to wake it up.
-	 */
-	if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
-		int state = so_state_get(so);
-
-		m_freem(m);
-		if (__predict_true((state & SS_NOFDREF) == 0))
-			so_sorwakeup_locked(so);
-		else
-			sockbuf_unlock(rcv);
-
-		return;
-	}
-
-	bsp = &q->buf_state[q->cur_buf];
-	hdr = cplhdr(m);
-	tcb = (__be64 *)(hdr + 1);
-	if (q->cur_buf == 0) {
-		t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
-		ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
-	} else {
-		t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
-		ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
-	}
-	ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
-	m->m_cur_offset = bsp->cur_offset;
-	bsp->cur_offset = ddp_offset;
-	m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
-
-	CTR5(KTR_TOM,
-	    "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u",
-	    q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset);
-	KASSERT(ddp_offset >= m->m_cur_offset,
-	    ("ddp_offset=%u less than cur_offset=%u",
-		ddp_offset, m->m_cur_offset));
-	
-#if 0
-{
-	unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
-
-	t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
-	ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
-
-        t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
-        rcv_nxt = t >> S_TCB_RCV_NXT;
-        rcv_nxt &= M_TCB_RCV_NXT;
-
-        t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
-        rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
-        rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
-
-	T3_TRACE2(TIDTB(sk),
-		  "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
-		  ddp_flags, rcv_nxt - rx_hdr_offset);
-	T3_TRACE4(TB(q),
-		  "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
-		  tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
-	T3_TRACE3(TB(q),
-		  "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
-		  rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
-	T3_TRACE2(TB(q),
-		  "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
-		 q->buf_state[0].flags, q->buf_state[1].flags);
-
-}
-#endif
-	if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
-		handle_excess_rx(toep, m);
-		return;
-	}
-
-#ifdef T3_TRACE
-	if ((int)m->m_pkthdr.len < 0) {
-		t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
-	}
-#endif
-	if (bsp->flags & DDP_BF_NOCOPY) {
-#ifdef T3_TRACE
-		T3_TRACE0(TB(q),
-			  "tcb_rpl_as_ddp_complete: CANCEL UBUF");
-
-		if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
-			printk("!cancel_ubuf");
-			t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
-		}
-#endif
-		m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
-		bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
-		q->cur_buf ^= 1;
-	} else if (bsp->flags & DDP_BF_NOFLIP) {
-
-		m->m_ddp_flags = 1;    /* always a kernel buffer */
-
-		/* now HW buffer carries a user buffer */
-		bsp->flags &= ~DDP_BF_NOFLIP;
-		bsp->flags |= DDP_BF_NOCOPY;
-
-		/* It is possible that the CPL_GET_TCB_RPL doesn't indicate
-		 * any new data in which case we're done. If in addition the
-		 * offset is 0, then there wasn't a completion for the kbuf
-		 * and we need to decrement the posted count.
-		 */
-		if (m->m_pkthdr.len == 0) {
-			if (ddp_offset == 0) {
-				q->kbuf_posted--;
-				bsp->flags |= DDP_BF_NODATA;
-			}
-			sockbuf_unlock(rcv);
-			m_free(m);
-			return;
-		}
-	} else {
-		sockbuf_unlock(rcv);
-
-		/* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
-		 * but it got here way late and nobody cares anymore.
-		 */
-		m_free(m);
-		return;
-	}
-
-	m->m_ddp_gl = (unsigned char *)bsp->gl;
-	m->m_flags |= M_DDP;
-	m->m_seq = tp->rcv_nxt;
-	tp->rcv_nxt += m->m_pkthdr.len;
-	tp->t_rcvtime = ticks;
-	CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u",
-		  m->m_seq, q->cur_buf, m->m_pkthdr.len);
-	if (m->m_pkthdr.len == 0) {
-		q->user_ddp_pending = 0;
-		m_free(m);
-	} else 
-		SBAPPEND(rcv, m);
-
-	state = so_state_get(so);	
-	if (__predict_true((state & SS_NOFDREF) == 0))
-		so_sorwakeup_locked(so);
-	else
-		sockbuf_unlock(rcv);
-}
-
-/*
- * Process a CPL_GET_TCB_RPL.  These can also be generated by the DDP code,
- * in that case they are similar to DDP completions.
- */
-static int
-do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct toepcb *toep = (struct toepcb *)ctx;
-
-	/* OK if socket doesn't exist */
-	if (toep == NULL) {
-		printf("null toep in do_get_tcb_rpl\n");
-		return (CPL_RET_BUF_DONE);
-	}
-
-	inp_wlock(toep->tp_tp->t_inpcb);
-	tcb_rpl_as_ddp_complete(toep, m);
-	inp_wunlock(toep->tp_tp->t_inpcb);
-	
+	send_reset(tp->t_toe);
 	return (0);
 }
 
-static void
-handle_ddp_data(struct toepcb *toep, struct mbuf *m)
-{
-	struct tcpcb *tp = toep->tp_tp;
-	struct socket *so;
-	struct ddp_state *q;
-	struct ddp_buf_state *bsp;
-	struct cpl_rx_data *hdr = cplhdr(m);
-	unsigned int rcv_nxt = ntohl(hdr->seq);
-	struct sockbuf *rcv;	
-	
-	if (tp->rcv_nxt == rcv_nxt)
-		return;
-
-	inp_lock_assert(tp->t_inpcb);
-	so  = inp_inpcbtosocket(tp->t_inpcb);
-	rcv = so_sockbuf_rcv(so);	
-	sockbuf_lock(rcv);	
-
-	q = &toep->tp_ddp_state;
-	bsp = &q->buf_state[q->cur_buf];
-	KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x",
-		rcv_nxt, tp->rcv_nxt));
-	m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
-	KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
-	CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d",
-	    rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
-
-#ifdef T3_TRACE
-	if ((int)m->m_pkthdr.len < 0) {
-		t3_ddp_error(so, "handle_ddp_data: neg len");
-	}
-#endif
-	m->m_ddp_gl = (unsigned char *)bsp->gl;
-	m->m_flags |= M_DDP;
-	m->m_cur_offset = bsp->cur_offset;
-	m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
-	if (bsp->flags & DDP_BF_NOCOPY)
-		bsp->flags &= ~DDP_BF_NOCOPY;
-
-	m->m_seq = tp->rcv_nxt;
-	tp->rcv_nxt = rcv_nxt;
-	bsp->cur_offset += m->m_pkthdr.len;
-	if (!(bsp->flags & DDP_BF_NOFLIP))
-		q->cur_buf ^= 1;
-	/*
-	 * For now, don't re-enable DDP after a connection fell out of  DDP
-	 * mode.
-	 */
-	q->ubuf_ddp_ready = 0;
-	sockbuf_unlock(rcv);
-}
-
-/*
- * Process new data received for a connection.
- */
-static void
-new_rx_data(struct toepcb *toep, struct mbuf *m)
-{
-	struct cpl_rx_data *hdr = cplhdr(m);
-	struct tcpcb *tp = toep->tp_tp;
-	struct socket *so;
-	struct sockbuf *rcv;	
-	int state;
-	int len = be16toh(hdr->len);
-
-	inp_wlock(tp->t_inpcb);
-
-	so  = inp_inpcbtosocket(tp->t_inpcb);
-	
-	if (__predict_false(so_no_receive(so))) {
-		handle_excess_rx(toep, m);
-		inp_wunlock(tp->t_inpcb);
-		TRACE_EXIT;
-		return;
-	}
-
-	if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
-		handle_ddp_data(toep, m);
-	
-	m->m_seq = ntohl(hdr->seq);
-	m->m_ulp_mode = 0;                    /* for iSCSI */
-
-#if VALIDATE_SEQ
-	if (__predict_false(m->m_seq != tp->rcv_nxt)) {
-		log(LOG_ERR,
-		       "%s: TID %u: Bad sequence number %u, expected %u\n",
-		    toep->tp_toedev->name, toep->tp_tid, m->m_seq,
-		       tp->rcv_nxt);
-		m_freem(m);
-		inp_wunlock(tp->t_inpcb);
-		return;
-	}
-#endif
-	m_adj(m, sizeof(*hdr));
-
-#ifdef URGENT_DATA_SUPPORTED
-	/*
-	 * We don't handle urgent data yet
-	 */
-	if (__predict_false(hdr->urg))
-		handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg));
-	if (__predict_false(tp->urg_data == TCP_URG_NOTYET &&
-		     tp->urg_seq - tp->rcv_nxt < skb->len))
-		tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq -
-							 tp->rcv_nxt];
-#endif	
-	if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) {
-		toep->tp_delack_mode = hdr->dack_mode;
-		toep->tp_delack_seq = tp->rcv_nxt;
-	}
-	CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d",
-	    m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes);
-	
-	if (len < m->m_pkthdr.len)
-		m->m_pkthdr.len = m->m_len = len;
-
-	tp->rcv_nxt += m->m_pkthdr.len;
-	tp->t_rcvtime = ticks;
-	toep->tp_enqueued_bytes += m->m_pkthdr.len;
-	CTR2(KTR_TOM,
-	    "new_rx_data: seq 0x%x len %u",
-	    m->m_seq, m->m_pkthdr.len);
-	inp_wunlock(tp->t_inpcb);
-	rcv = so_sockbuf_rcv(so);
-	sockbuf_lock(rcv);
-#if 0	
-	if (sb_notify(rcv))
-		DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len);
-#endif
-	SBAPPEND(rcv, m);
-
-#ifdef notyet
-	/*
-	 * We're giving too many credits to the card - but disable this check so we can keep on moving :-|
-	 *
-	 */
-	KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1),
-
-	    ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d",
-		so, rcv->sb_cc, rcv->sb_mbmax));
-#endif
-	
-
-	CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d",
-	    rcv->sb_cc, rcv->sb_mbcnt);
-	
-	state = so_state_get(so);	
-	if (__predict_true((state & SS_NOFDREF) == 0))
-		so_sorwakeup_locked(so);
-	else
-		sockbuf_unlock(rcv);
-}
-
 /*
  * Handler for RX_DATA CPL messages.
  */
 static int
-do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+do_rx_data(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	struct toepcb *toep = (struct toepcb *)ctx;
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct cpl_rx_data *hdr = mtod(m, void *);
+	unsigned int tid = GET_TID(hdr);
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+	struct inpcb *inp = toep->tp_inp;
+	struct tcpcb *tp;
+	struct socket *so;
+	struct sockbuf *so_rcv;	
 
-	DPRINTF("rx_data len=%d\n", m->m_pkthdr.len);
-	
-	new_rx_data(toep, m);
+	/* Advance over CPL */
+	m_adj(m, sizeof(*hdr));
 
-	return (0);
-}
-
-static void
-new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
-{
-	struct tcpcb *tp;
-	struct ddp_state *q;
-	struct ddp_buf_state *bsp;
-	struct cpl_rx_data_ddp *hdr;
-	struct socket *so;	
-	unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
-	int nomoredata = 0;
-	unsigned int delack_mode;
-	struct sockbuf *rcv;
-	
-	tp = toep->tp_tp;	
-	inp_wlock(tp->t_inpcb);
-	so = inp_inpcbtosocket(tp->t_inpcb);
-
-	if (__predict_false(so_no_receive(so))) {
-
-		handle_excess_rx(toep, m);
-		inp_wunlock(tp->t_inpcb);
-		return;
-	}
-	
-	q = &toep->tp_ddp_state;
-	hdr = cplhdr(m);
-	ddp_report = ntohl(hdr->u.ddp_report);
-	buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
-	bsp = &q->buf_state[buf_idx];
-
-	CTR4(KTR_TOM,
-	    "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
-	    "hdr seq 0x%x len %u",
-	    tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
-	    ntohs(hdr->len));
-	CTR3(KTR_TOM,
-	    "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d",
-	    G_DDP_OFFSET(ddp_report), ddp_report, buf_idx);
-	
-	ddp_len = ntohs(hdr->len);
-	rcv_nxt = ntohl(hdr->seq) + ddp_len;
-
-	delack_mode = G_DDP_DACK_MODE(ddp_report);
-	if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
-		toep->tp_delack_mode = delack_mode;
-		toep->tp_delack_seq = tp->rcv_nxt;
-	}
-	
-	m->m_seq = tp->rcv_nxt;
-	tp->rcv_nxt = rcv_nxt;
-
-	tp->t_rcvtime = ticks;
-	/*
-	 * Store the length in m->m_len.  We are changing the meaning of
-	 * m->m_len here, we need to be very careful that nothing from now on
-	 * interprets ->len of this packet the usual way.
-	 */
-	m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq;
-	inp_wunlock(tp->t_inpcb);
-	CTR3(KTR_TOM,
-	    "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ",
-	    m->m_len, rcv_nxt, m->m_seq);
-	/*
-	 * Figure out where the new data was placed in the buffer and store it
-	 * in when.  Assumes the buffer offset starts at 0, consumer needs to
-	 * account for page pod's pg_offset.
-	 */
-	end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
-	m->m_cur_offset = end_offset - m->m_pkthdr.len;
-
-	rcv = so_sockbuf_rcv(so);
-	sockbuf_lock(rcv);	
-
-	m->m_ddp_gl = (unsigned char *)bsp->gl;
-	m->m_flags |= M_DDP;
-	bsp->cur_offset = end_offset;
-	toep->tp_enqueued_bytes += m->m_pkthdr.len;
-
-	/*
-	 * Length is only meaningful for kbuf
-	 */
-	if (!(bsp->flags & DDP_BF_NOCOPY))
-		KASSERT(m->m_len <= bsp->gl->dgl_length,
-		    ("length received exceeds ddp pages: len=%d dgl_length=%d",
-			m->m_len, bsp->gl->dgl_length));
-
-	KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
-	KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next));
-        /*
-	 * Bit 0 of flags stores whether the DDP buffer is completed.
-	 * Note that other parts of the code depend on this being in bit 0.
-	 */
-	if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
-		panic("spurious ddp completion");
-	} else {
-		m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
-		if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 
-			q->cur_buf ^= 1;                     /* flip buffers */
+	/* XXX: revisit.  This comes from the T4 TOM */
+	if (__predict_false(inp == NULL)) {
+		/*
+		 * do_pass_establish failed and must be attempting to abort the
+		 * connection.  Meanwhile, the T4 has sent us data for such a
+		 * connection.
+		 */
+#ifdef notyet
+		KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN),
+		    ("%s: inp NULL and tid isn't being aborted", __func__));
+#endif
+		m_freem(m);
+		return (0);
 	}
 
-	if (bsp->flags & DDP_BF_NOCOPY) {
-		m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
-		bsp->flags &= ~DDP_BF_NOCOPY;
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
+		CTR4(KTR_CXGB, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
+		    __func__, tid, m->m_pkthdr.len, inp->inp_flags);
+		INP_WUNLOCK(inp);
+		m_freem(m);
+		return (0);
 	}
 
-	if (ddp_report & F_DDP_PSH)
-		m->m_ddp_flags |= DDP_BF_PSH;
-	if (nomoredata)
-		m->m_ddp_flags |= DDP_BF_NODATA;
+	if (__predict_false(hdr->dack_mode != toep->tp_delack_mode))
+		toep->tp_delack_mode = hdr->dack_mode;
 
-#ifdef notyet	
-	skb_reset_transport_header(skb);
-	tcp_hdr(skb)->fin = 0;          /* changes original hdr->ddp_report */
+	tp = intotcpcb(inp);
+
+#ifdef INVARIANTS
+	if (__predict_false(tp->rcv_nxt != be32toh(hdr->seq))) {
+		log(LOG_ERR,
+		    "%s: unexpected seq# %x for TID %u, rcv_nxt %x\n",
+		    __func__, be32toh(hdr->seq), toep->tp_tid, tp->rcv_nxt);
+	}
 #endif
-	SBAPPEND(rcv, m);
-
-	if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) ||
-	    (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1))
-		|| !(m->m_ddp_flags & DDP_BF_NOCOPY))))
-		so_sorwakeup_locked(so);
-	else
-		sockbuf_unlock(rcv);
-}
-
-#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
-		 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
-		 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
-		 F_DDP_INVALID_PPOD)
-
-/*
- * Handler for RX_DATA_DDP CPL messages.
- */
-static int
-do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct toepcb *toep = ctx;
-	const struct cpl_rx_data_ddp *hdr = cplhdr(m);
-
-	VALIDATE_SOCK(so);
-
-	if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) {
-		log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n",
-		       GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status)));
-		return (CPL_RET_BUF_DONE);
-	}
-#if 0
-	skb->h.th = tcphdr_skb->h.th;
-#endif	
-	new_rx_data_ddp(toep, m);
-	return (0);
-}
-
-static void
-process_ddp_complete(struct toepcb *toep, struct mbuf *m)
-{
-	struct tcpcb *tp = toep->tp_tp;
-	struct socket *so;
-	struct ddp_state *q;
-	struct ddp_buf_state *bsp;
-	struct cpl_rx_ddp_complete *hdr;
-	unsigned int ddp_report, buf_idx, when, delack_mode;
-	int nomoredata = 0;
-	struct sockbuf *rcv;
-	
-	inp_wlock(tp->t_inpcb);
-	so = inp_inpcbtosocket(tp->t_inpcb);
-
-	if (__predict_false(so_no_receive(so))) {
-		struct inpcb *inp = so_sotoinpcb(so);
-
-		handle_excess_rx(toep, m);
-		inp_wunlock(inp);
-		return;
-	}
-	q = &toep->tp_ddp_state; 
-	hdr = cplhdr(m);
-	ddp_report = ntohl(hdr->ddp_report);
-	buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
-	m->m_pkthdr.csum_data = tp->rcv_nxt;
-
-	rcv = so_sockbuf_rcv(so);
-	sockbuf_lock(rcv);
-
-	bsp = &q->buf_state[buf_idx];
-	when = bsp->cur_offset;
-	m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
-	tp->rcv_nxt += m->m_len;
+	tp->rcv_nxt += m->m_pkthdr.len;
+	KASSERT(tp->rcv_wnd >= m->m_pkthdr.len,
+	    ("%s: negative window size", __func__));
+	tp->rcv_wnd -= m->m_pkthdr.len;
 	tp->t_rcvtime = ticks;
 
-	delack_mode = G_DDP_DACK_MODE(ddp_report);
-	if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
-		toep->tp_delack_mode = delack_mode;
-		toep->tp_delack_seq = tp->rcv_nxt;
+	so  = inp->inp_socket;
+	so_rcv = &so->so_rcv;
+	SOCKBUF_LOCK(so_rcv);
+
+	if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) {
+		CTR3(KTR_CXGB, "%s: tid %u, excess rx (%d bytes)",
+		    __func__, tid, m->m_pkthdr.len);
+		SOCKBUF_UNLOCK(so_rcv);
+		INP_WUNLOCK(inp);
+
+		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_WLOCK(inp);
+		tp = tcp_drop(tp, ECONNRESET);
+		if (tp)
+			INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+
+		m_freem(m);
+		return (0);
 	}
-#ifdef notyet
-	skb_reset_transport_header(skb);
-	tcp_hdr(skb)->fin = 0;          /* changes valid memory past CPL */
-#endif
-	inp_wunlock(tp->t_inpcb);
 
-	KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
-	CTR5(KTR_TOM,
-		  "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
-		  "ddp_report 0x%x offset %u, len %u",
-		  tp->rcv_nxt, bsp->cur_offset, ddp_report,
-		   G_DDP_OFFSET(ddp_report), m->m_len);
+	/* receive buffer autosize */
+	if (so_rcv->sb_flags & SB_AUTOSIZE &&
+	    V_tcp_do_autorcvbuf &&
+	    so_rcv->sb_hiwat < V_tcp_autorcvbuf_max &&
+	    (m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7) || tp->rcv_wnd < 32768)) {
+		unsigned int hiwat = so_rcv->sb_hiwat;
+		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
+		    V_tcp_autorcvbuf_max);
 
-	m->m_cur_offset = bsp->cur_offset;
-	bsp->cur_offset += m->m_len;
+		if (!sbreserve_locked(so_rcv, newsize, so, NULL))
+			so_rcv->sb_flags &= ~SB_AUTOSIZE;
+		else
+			toep->tp_rx_credits += newsize - hiwat;
+	}
 
-	if (!(bsp->flags & DDP_BF_NOFLIP)) {
-		q->cur_buf ^= 1;                     /* flip buffers */
-		if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
-			nomoredata=1;
-	}
-		
-	CTR4(KTR_TOM,
-		  "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
-		  "ddp_report %u offset %u",
-		  tp->rcv_nxt, bsp->cur_offset, ddp_report,
-		   G_DDP_OFFSET(ddp_report));
-	
-	m->m_ddp_gl = (unsigned char *)bsp->gl;
-	m->m_flags |= M_DDP;
-	m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
-	if (bsp->flags & DDP_BF_NOCOPY)
-		bsp->flags &= ~DDP_BF_NOCOPY;
-	if (nomoredata)
-		m->m_ddp_flags |= DDP_BF_NODATA;
+	toep->tp_enqueued += m->m_pkthdr.len;
+	sbappendstream_locked(so_rcv, m);
+	sorwakeup_locked(so);
+	SOCKBUF_UNLOCK_ASSERT(so_rcv);
 
-	SBAPPEND(rcv, m);
-	if ((so_state_get(so) & SS_NOFDREF) == 0)
-		so_sorwakeup_locked(so);
-	else
-		sockbuf_unlock(rcv);
-}
-
-/*
- * Handler for RX_DDP_COMPLETE CPL messages.
- */
-static int
-do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct toepcb *toep = ctx;
-
-	VALIDATE_SOCK(so);
-#if 0
-	skb->h.th = tcphdr_skb->h.th;
-#endif	
-	process_ddp_complete(toep, m);
+	INP_WUNLOCK(inp);
 	return (0);
 }
 
 /*
- * Move a socket to TIME_WAIT state.  We need to make some adjustments to the
- * socket state before calling tcp_time_wait to comply with its expectations.
+ * Handler for PEER_CLOSE CPL messages.
  */
-static void
-enter_timewait(struct tcpcb *tp)
+static int
+do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	/*
-	 * Bump rcv_nxt for the peer FIN.  We don't do this at the time we
-	 * process peer_close because we don't want to carry the peer FIN in
-	 * the socket's receive queue and if we increment rcv_nxt without
-	 * having the FIN in the receive queue we'll confuse facilities such
-	 * as SIOCINQ.
-	 */
-	inp_wlock(tp->t_inpcb);	
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	const struct cpl_peer_close *hdr = mtod(m, void *);
+	unsigned int tid = GET_TID(hdr);
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+	struct inpcb *inp = toep->tp_inp;
+	struct tcpcb *tp;
+	struct socket *so;
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(inp);
+	tp = intotcpcb(inp);
+
+	CTR5(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
+	    tid, tp ? tcpstates[tp->t_state] : "no tp" , toep->tp_flags, inp);
+
+	if (toep->tp_flags & TP_ABORT_RPL_PENDING)
+		goto done;
+
+	so = inp_inpcbtosocket(inp);
+
+	socantrcvmore(so);
 	tp->rcv_nxt++;
 
-	tp->ts_recent_age = 0;	     /* defeat recycling */
-	tp->t_srtt = 0;                        /* defeat tcp_update_metrics */
-	inp_wunlock(tp->t_inpcb);
-	tcp_offload_twstart(tp);
-}
-
-/*
- * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE.  This
- * function deals with the data that may be reported along with the FIN.
- * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to
- * perform normal FIN-related processing.  In the latter case 1 indicates that
- * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the
- * skb can be freed.
- */
-static int
-handle_peer_close_data(struct socket *so, struct mbuf *m)
-{
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
-	struct ddp_state *q;
-	struct ddp_buf_state *bsp;
-	struct cpl_peer_close *req = cplhdr(m);
-	unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
-	struct sockbuf *rcv;
-	
-	if (tp->rcv_nxt == rcv_nxt)			/* no data */
-		return (0);
-
-	CTR0(KTR_TOM, "handle_peer_close_data");
-	if (__predict_false(so_no_receive(so))) {
-		handle_excess_rx(toep, m);
-
-		/*
-		 * Although we discard the data we want to process the FIN so
-		 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP +
-		 * PEER_CLOSE without data.  In particular this PEER_CLOSE
-		 * may be what will close the connection.  We return 1 because
-		 * handle_excess_rx() already freed the packet.
-		 */
-		return (1);
-	}
-
-	inp_lock_assert(tp->t_inpcb);
-	q = &toep->tp_ddp_state;
-	rcv = so_sockbuf_rcv(so);
-	sockbuf_lock(rcv);
-
-	bsp = &q->buf_state[q->cur_buf];
-	m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
-	KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
-	m->m_ddp_gl = (unsigned char *)bsp->gl;
-	m->m_flags |= M_DDP;
-	m->m_cur_offset = bsp->cur_offset;
-	m->m_ddp_flags = 
-	    DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
-	m->m_seq = tp->rcv_nxt;
-	tp->rcv_nxt = rcv_nxt;
-	bsp->cur_offset += m->m_pkthdr.len;
-	if (!(bsp->flags & DDP_BF_NOFLIP))
-		q->cur_buf ^= 1;
-#ifdef notyet	
-	skb_reset_transport_header(skb);
-	tcp_hdr(skb)->fin = 0;          /* changes valid memory past CPL */
-#endif	
-	tp->t_rcvtime = ticks;
-	SBAPPEND(rcv, m);
-	if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
-		so_sorwakeup_locked(so);
-	else
-		sockbuf_unlock(rcv);
-
-	return (1);
-}
-
-/*
- * Handle a peer FIN.
- */
-static void
-do_peer_fin(struct toepcb *toep, struct mbuf *m)
-{
-	struct socket *so;
-	struct tcpcb *tp = toep->tp_tp;
-	int keep, action;
-	
-	action = keep = 0;	
-	CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state);
-	if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
-		printf("abort_pending set\n");
-		
-		goto out;
-	}
-	inp_wlock(tp->t_inpcb);
-	so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
-	if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
-		keep = handle_peer_close_data(so, m);
-		if (keep < 0) {
-			inp_wunlock(tp->t_inpcb);					
-			return;
-		}
-	}
-	if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
-		CTR1(KTR_TOM,
-		    "waking up waiters for cantrcvmore on %p ", so);	
-		socantrcvmore(so);
-
-		/*
-		 * If connection is half-synchronized
-		 * (ie NEEDSYN flag on) then delay ACK,
-		 * so it may be piggybacked when SYN is sent.
-		 * Otherwise, since we received a FIN then no
-		 * more input can be expected, send ACK now.
-		 */
-		if (tp->t_flags & TF_NEEDSYN)
-			tp->t_flags |= TF_DELACK;
-		else
-			tp->t_flags |= TF_ACKNOW;
-		tp->rcv_nxt++;
-	}
-	
 	switch (tp->t_state) {
 	case TCPS_SYN_RECEIVED:
-	    tp->t_starttime = ticks;
-	/* FALLTHROUGH */ 
+		tp->t_starttime = ticks;
+		/* FALLTHROUGH */ 
 	case TCPS_ESTABLISHED:
 		tp->t_state = TCPS_CLOSE_WAIT;
 		break;
@@ -2561,228 +1251,134 @@
 		tp->t_state = TCPS_CLOSING;
 		break;
 	case TCPS_FIN_WAIT_2:
-		/*
-		 * If we've sent an abort_req we must have sent it too late,
-		 * HW will send us a reply telling us so, and this peer_close
-		 * is really the last message for this connection and needs to
-		 * be treated as an abort_rpl, i.e., transition the connection
-		 * to TCP_CLOSE (note that the host stack does this at the
-		 * time of generating the RST but we must wait for HW).
-		 * Otherwise we enter TIME_WAIT.
-		 */
-		t3_release_offload_resources(toep);
-		if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
-			action = TCP_CLOSE;
-		} else {
-			action = TCP_TIMEWAIT;			
-		}
-		break;
+		tcp_twstart(tp);
+		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+
+		INP_WLOCK(inp);
+		toepcb_release(toep);	/* no more CPLs expected */
+
+		m_freem(m);
+		return (0);
 	default:
-		log(LOG_ERR,
-		       "%s: TID %u received PEER_CLOSE in bad state %d\n",
-		    toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state);
-	}
-	inp_wunlock(tp->t_inpcb);					
-
-	if (action == TCP_TIMEWAIT) {
-		enter_timewait(tp);
-	} else if (action == TCP_DROP) {
-		tcp_offload_drop(tp, 0);		
-	} else if (action == TCP_CLOSE) {
-		tcp_offload_close(tp);		
+		log(LOG_ERR, "%s: TID %u received PEER_CLOSE in bad state %d\n",
+		    __func__, toep->tp_tid, tp->t_state);
 	}
 
-#ifdef notyet		
-	/* Do not send POLL_HUP for half duplex close. */
-	if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
-	    sk->sk_state == TCP_CLOSE)
-		sk_wake_async(so, 1, POLL_HUP);
-	else
-		sk_wake_async(so, 1, POLL_IN);
-#endif
+done:
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
 
-out:
-	if (!keep)
-		m_free(m);
-}
-
-/*
- * Handler for PEER_CLOSE CPL messages.
- */
-static int
-do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct toepcb *toep = (struct toepcb *)ctx;
-
-	VALIDATE_SOCK(so);
-
-	do_peer_fin(toep, m);
-	return (0);
-}
-
-static void
-process_close_con_rpl(struct toepcb *toep, struct mbuf *m)
-{
-	struct cpl_close_con_rpl *rpl = cplhdr(m);
-	struct tcpcb *tp = toep->tp_tp;	
-	struct socket *so;	
-	int action = 0;
-	struct sockbuf *rcv;	
-	
-	inp_wlock(tp->t_inpcb);
-	so = inp_inpcbtosocket(tp->t_inpcb);	
-	
-	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
-
-	if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
-		inp_wunlock(tp->t_inpcb);
-		goto out;
-	}
-	
-	CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep, 
-	    tp->t_state, !!(so_state_get(so) & SS_NOFDREF));
-
-	switch (tp->t_state) {
-	case TCPS_CLOSING:              /* see FIN_WAIT2 case in do_peer_fin */
-		t3_release_offload_resources(toep);
-		if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
-			action = TCP_CLOSE;
-
-		} else {
-			action = TCP_TIMEWAIT;
-		}
-		break;
-	case TCPS_LAST_ACK:
-		/*
-		 * In this state we don't care about pending abort_rpl.
-		 * If we've sent abort_req it was post-close and was sent too
-		 * late, this close_con_rpl is the actual last message.
-		 */
-		t3_release_offload_resources(toep);
-		action = TCP_CLOSE;
-		break;
-	case TCPS_FIN_WAIT_1:
-		/*
-		 * If we can't receive any more
-		 * data, then closing user can proceed.
-		 * Starting the timer is contrary to the
-		 * specification, but if we don't get a FIN
-		 * we'll hang forever.
-		 *
-		 * XXXjl:
-		 * we should release the tp also, and use a
-		 * compressed state.
-		 */
-		if (so)
-			rcv = so_sockbuf_rcv(so);
-		else
-			break;
-		
-		if (rcv->sb_state & SBS_CANTRCVMORE) {
-			int timeout;
-
-			if (so)
-				soisdisconnected(so);
-			timeout = (tcp_fast_finwait2_recycle) ? 
-			    tcp_finwait2_timeout : tcp_maxidle;
-			tcp_timer_activate(tp, TT_2MSL, timeout);
-		}
-		tp->t_state = TCPS_FIN_WAIT_2;
-		if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 &&
-		    (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
-			action = TCP_DROP;
-		}
-
-		break;
-	default:
-		log(LOG_ERR,
-		       "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
-		       toep->tp_toedev->tod_name, toep->tp_tid,
-		       tp->t_state);
-	}
-	inp_wunlock(tp->t_inpcb);
-
-
-	if (action == TCP_TIMEWAIT) {
-		enter_timewait(tp);
-	} else if (action == TCP_DROP) {
-		tcp_offload_drop(tp, 0);		
-	} else if (action == TCP_CLOSE) {
-		tcp_offload_close(tp);		
-	}
-out:
 	m_freem(m);
-}
-
-/*
- * Handler for CLOSE_CON_RPL CPL messages.
- */
-static int
-do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
-			    void *ctx)
-{
-	struct toepcb *toep = (struct toepcb *)ctx;
-
-	process_close_con_rpl(toep, m);
 	return (0);
 }
 
 /*
- * Process abort replies.  We only process these messages if we anticipate
- * them as the coordination between SW and HW in this area is somewhat lacking
- * and sometimes we get ABORT_RPLs after we are done with the connection that
- * originated the ABORT_REQ.
+ * Handler for CLOSE_CON_RPL CPL messages.  peer ACK to our FIN received.
  */
-static void
-process_abort_rpl(struct toepcb *toep, struct mbuf *m)
+static int
+do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	struct tcpcb *tp = toep->tp_tp;
-	struct socket *so;	
-	int needclose = 0;
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	const struct cpl_close_con_rpl *rpl = mtod(m, void *);
+	unsigned int tid = GET_TID(rpl);
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+	struct inpcb *inp = toep->tp_inp;
+	struct tcpcb *tp;
+	struct socket *so;
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(inp);
+	tp = intotcpcb(inp);
+
+	CTR4(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid,
+	    tp ? tcpstates[tp->t_state] : "no tp", toep->tp_flags);
+
+	if ((toep->tp_flags & TP_ABORT_RPL_PENDING))
+		goto done;
+
+	so = inp_inpcbtosocket(inp);
+	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
+
+	switch (tp->t_state) {
+	case TCPS_CLOSING:
+		tcp_twstart(tp);
+release:
+		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+
+		INP_WLOCK(inp);
+		toepcb_release(toep);	/* no more CPLs expected */
 	
-#ifdef T3_TRACE
-	T3_TRACE1(TIDTB(sk),
-		  "process_abort_rpl: GTS rpl pending %d",
-		  sock_flag(sk, ABORT_RPL_PENDING));
-#endif
-	
-	inp_wlock(tp->t_inpcb);
-	so = inp_inpcbtosocket(tp->t_inpcb);
-	
-	if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
-		/*
-		 * XXX panic on tcpdrop
-		 */
-		if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev))
-			toep->tp_flags |= TP_ABORT_RPL_RCVD;
-		else {
-			toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING);
-			if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) ||
-			    !is_t3a(toep->tp_toedev)) {
-				if (toep->tp_flags & TP_ABORT_REQ_RCVD)
-					panic("TP_ABORT_REQ_RCVD set");
-				t3_release_offload_resources(toep);
-				needclose = 1;
-			}
-		}
+		m_freem(m);
+		return (0);
+	case TCPS_LAST_ACK:
+		if (tcp_close(tp))
+			INP_WUNLOCK(inp);
+		goto release;
+
+	case TCPS_FIN_WAIT_1:
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+			soisdisconnected(so);
+		tp->t_state = TCPS_FIN_WAIT_2;
+		break;
+	default:
+		log(LOG_ERR,
+		    "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
+		    __func__, toep->tp_tid, tp->t_state);
 	}
-	inp_wunlock(tp->t_inpcb);
 
-	if (needclose)
-		tcp_offload_close(tp);
+done:
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
 
-	m_free(m);
+	m_freem(m);
+	return (0);
+}
+
+static int
+do_smt_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct cpl_smt_write_rpl *rpl = mtod(m, void *);
+
+	if (rpl->status != CPL_ERR_NONE) {
+		log(LOG_ERR,
+		    "Unexpected SMT_WRITE_RPL status %u for entry %u\n",
+		    rpl->status, GET_TID(rpl));
+	}
+
+	m_freem(m);
+	return (0);
+}
+
+static int
+do_set_tcb_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct cpl_set_tcb_rpl *rpl = mtod(m, void *);
+
+	if (rpl->status != CPL_ERR_NONE) {
+		log(LOG_ERR, "Unexpected SET_TCB_RPL status %u for tid %u\n",
+		    rpl->status, GET_TID(rpl));
+	}
+
+	m_freem(m);
+	return (0);
 }
 
 /*
  * Handle an ABORT_RPL_RSS CPL message.
  */
 static int
-do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+do_abort_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	struct cpl_abort_rpl_rss *rpl = cplhdr(m);
-	struct toepcb *toep;
-	
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
+	unsigned int tid = GET_TID(rpl);
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+	struct inpcb *inp;
+
 	/*
 	 * Ignore replies to post-close aborts indicating that the abort was
 	 * requested too late.  These connections are terminated when we get
@@ -2790,99 +1386,54 @@
 	 * arrives the TID is either no longer used or it has been recycled.
 	 */
 	if (rpl->status == CPL_ERR_ABORT_FAILED) {
-discard:
-		m_free(m);
+		m_freem(m);
 		return (0);
 	}
 
-	toep = (struct toepcb *)ctx;
-	
-        /*
-	 * Sometimes we've already closed the socket, e.g., a post-close
-	 * abort races with ABORT_REQ_RSS, the latter frees the socket
-	 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
-	 * but FW turns the ABORT_REQ into a regular one and so we get
-	 * ABORT_RPL_RSS with status 0 and no socket.  Only on T3A.
-	 */
-	if (!toep)
-		goto discard;
+	if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
+		return (do_abort_rpl_synqe(qs, r, m));
 
-	if (toep->tp_tp == NULL) {
-		log(LOG_NOTICE, "removing tid for abort\n");
-		cxgb_remove_tid(cdev, toep, toep->tp_tid);
-		if (toep->tp_l2t) 
-			l2t_release(L2DATA(cdev), toep->tp_l2t);
+	CTR4(KTR_CXGB, "%s: tid %d, toep %p, status %d", __func__, tid, toep,
+	    rpl->status);
 
-		toepcb_release(toep);
-		goto discard;
+	inp = toep->tp_inp;
+	INP_WLOCK(inp);
+
+	if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+		if (!(toep->tp_flags & TP_ABORT_RPL_RCVD)) {
+			toep->tp_flags |= TP_ABORT_RPL_RCVD;
+			INP_WUNLOCK(inp);
+		} else {
+			toep->tp_flags &= ~TP_ABORT_RPL_RCVD;
+			toep->tp_flags &= TP_ABORT_RPL_PENDING;
+			toepcb_release(toep);	/* no more CPLs expected */
+		}
 	}
-	
-	log(LOG_NOTICE, "toep=%p\n", toep);
-	log(LOG_NOTICE, "tp=%p\n", toep->tp_tp);
 
-	toepcb_hold(toep);
-	process_abort_rpl(toep, m);
-	toepcb_release(toep);
+	m_freem(m);
 	return (0);
 }
 
 /*
- * Convert the status code of an ABORT_REQ into a FreeBSD error code.  Also
- * indicate whether RST should be sent in response.
+ * Convert the status code of an ABORT_REQ into a FreeBSD error code.
  */
 static int
-abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst)
+abort_status_to_errno(struct tcpcb *tp, int abort_reason)
 {
-	struct tcpcb *tp = so_sototcpcb(so);
-
 	switch (abort_reason) {
 	case CPL_ERR_BAD_SYN:
-#if 0		
-		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);	// fall through
-#endif		
 	case CPL_ERR_CONN_RESET:
-		// XXX need to handle SYN_RECV due to crossed SYNs
 		return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
 	case CPL_ERR_XMIT_TIMEDOUT:
 	case CPL_ERR_PERSIST_TIMEDOUT:
 	case CPL_ERR_FINWAIT2_TIMEDOUT:
 	case CPL_ERR_KEEPALIVE_TIMEDOUT:
-#if 0		
-		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
-#endif		
 		return (ETIMEDOUT);
 	default:
 		return (EIO);
 	}
 }
 
-static inline void
-set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd)
-{
-	struct cpl_abort_rpl *rpl = cplhdr(m);
-
-	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
-	rpl->wr.wr_lo = htonl(V_WR_TID(tid));
-	m->m_len = m->m_pkthdr.len = sizeof(*rpl);
-	
-	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
-	rpl->cmd = cmd;
-}
-
-static void
-send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m)
-{
-	struct mbuf *reply_mbuf;
-	struct cpl_abort_req_rss *req = cplhdr(m);
-
-	reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl));
-	m_set_priority(m, CPL_PRIORITY_DATA);
-	m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl);
-	set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status);
-	cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
-	m_free(m);
-}
-
 /*
  * Returns whether an ABORT_REQ_RSS message is a negative advice.
  */
@@ -2893,850 +1444,177 @@
 	    status == CPL_ERR_PERSIST_NEG_ADVICE;
 }
 
-static void
-send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status)
+void
+send_abort_rpl(struct toedev *tod, int tid, int qset)
 {
-	struct mbuf  *reply_mbuf;
-	struct cpl_abort_req_rss *req = cplhdr(m);
+	struct mbuf *reply;
+	struct cpl_abort_rpl *rpl;
+	struct adapter *sc = tod->tod_softc;
 
-	reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
+	reply = M_GETHDR_OFLD(qset, CPL_PRIORITY_DATA, rpl);
+	if (!reply)
+		CXGB_UNIMPLEMENTED();
 
-	if (!reply_mbuf) {
-		/* Defer the reply.  Stick rst_status into req->cmd. */
-		req->status = rst_status;
-		t3_defer_reply(m, tdev, send_deferred_abort_rpl);
-		return;
+	rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
+	rpl->wr.wrh_lo = htonl(V_WR_TID(tid));
+	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
+	rpl->cmd = CPL_ABORT_NO_RST;
+
+	t3_offload_tx(sc, reply);
+}
+
+/*
+ * Handle an ABORT_REQ_RSS CPL message.  If we're waiting for an ABORT_RPL we
+ * ignore this request except that we need to reply to it.
+ */
+static int
+do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct toedev *tod = &td->tod;
+	const struct cpl_abort_req_rss *req = mtod(m, void *);
+	unsigned int tid = GET_TID(req);
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+	struct inpcb *inp;
+	struct tcpcb *tp;
+	struct socket *so;
+	int qset = toep->tp_qset;
+
+	if (is_neg_adv_abort(req->status)) {
+		CTR4(KTR_CXGB, "%s: negative advice %d for tid %u (%x)",
+		    __func__, req->status, tid, toep->tp_flags);
+		m_freem(m);
+		return (0);
 	}
 
-	m_set_priority(reply_mbuf, CPL_PRIORITY_DATA);
-	set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status);
-	m_free(m);
+	if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
+		return (do_abort_req_synqe(qs, r, m));
+
+	inp = toep->tp_inp;
+	INP_INFO_WLOCK(&V_tcbinfo);	/* for tcp_close */
+	INP_WLOCK(inp);
+
+	tp = intotcpcb(inp);
+	so = inp->inp_socket;
+
+	CTR6(KTR_CXGB, "%s: tid %u (%s), toep %p (%x), status %d",
+	    __func__, tid, tcpstates[tp->t_state], toep, toep->tp_flags,
+	    req->status);
+
+	if (!(toep->tp_flags & TP_ABORT_REQ_RCVD)) {
+		toep->tp_flags |= TP_ABORT_REQ_RCVD;
+		toep->tp_flags |= TP_ABORT_SHUTDOWN;
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		m_freem(m);
+		return (0);
+	}
+	toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
 
 	/*
-	 * XXX need to sync with ARP as for SYN_RECV connections we can send
-	 * these messages while ARP is pending.  For other connection states
-	 * it's not a problem.
+	 * If we'd sent a reset on this toep, we'll ignore this and clean up in
+	 * the T3's reply to our reset instead.
 	 */
-	cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
-}
+	if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+		toep->tp_flags |= TP_ABORT_RPL_SENT;
+		INP_WUNLOCK(inp);
+	} else {
+		so_error_set(so, abort_status_to_errno(tp, req->status));
+		tp = tcp_close(tp);
+		if (tp == NULL)
+			INP_WLOCK(inp);	/* re-acquire */
+		toepcb_release(toep);	/* no more CPLs expected */
+	}
+	INP_INFO_WUNLOCK(&V_tcbinfo);
 
-#ifdef notyet
-static void
-cleanup_syn_rcv_conn(struct socket *child, struct socket *parent)
-{
-	CXGB_UNIMPLEMENTED();
-#ifdef notyet	
-	struct request_sock *req = child->sk_user_data;
-
-	inet_csk_reqsk_queue_removed(parent, req);
-	synq_remove(tcp_sk(child));
-	__reqsk_free(req);
-	child->sk_user_data = NULL;
-#endif
-}
-
-
-/*
- * Performs the actual work to abort a SYN_RECV connection.
- */
-static void
-do_abort_syn_rcv(struct socket *child, struct socket *parent)
-{
-	struct tcpcb *parenttp = so_sototcpcb(parent);
-	struct tcpcb *childtp = so_sototcpcb(child);
-
-	/*
-	 * If the server is still open we clean up the child connection,
-	 * otherwise the server already did the clean up as it was purging
-	 * its SYN queue and the skb was just sitting in its backlog.
-	 */
-	if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
-		cleanup_syn_rcv_conn(child, parent);
-		inp_wlock(childtp->t_inpcb);
-		t3_release_offload_resources(childtp->t_toe);
-		inp_wunlock(childtp->t_inpcb);
-		tcp_offload_close(childtp);
-	}
-}
-#endif
-
-/*
- * Handle abort requests for a SYN_RECV connection.  These need extra work
- * because the socket is on its parent's SYN queue.
- */
-static int
-abort_syn_rcv(struct socket *so, struct mbuf *m)
-{
-	CXGB_UNIMPLEMENTED();
-#ifdef notyet	
-	struct socket *parent;
-	struct toedev *tdev = toep->tp_toedev;
-	struct t3cdev *cdev = TOM_DATA(tdev)->cdev;
-	struct socket *oreq = so->so_incomp;
-	struct t3c_tid_entry *t3c_stid;
-	struct tid_info *t;
-
-	if (!oreq)
-		return -1;        /* somehow we are not on the SYN queue */
-
-	t = &(T3C_DATA(cdev))->tid_maps;
-	t3c_stid = lookup_stid(t, oreq->ts_recent);
-	parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
-
-	so_lock(parent);
-	do_abort_syn_rcv(so, parent);
-	send_abort_rpl(m, tdev, CPL_ABORT_NO_RST);
-	so_unlock(parent);
-#endif
+	send_abort_rpl(tod, tid, qset);
+	m_freem(m);
 	return (0);
 }
 
-/*
- * Process abort requests.  If we are waiting for an ABORT_RPL we ignore this
- * request except that we need to reply to it.
- */
 static void
-process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev)
+assign_rxopt(struct tcpcb *tp, uint16_t tcpopt)
 {
-	int rst_status = CPL_ABORT_NO_RST;
-	const struct cpl_abort_req_rss *req = cplhdr(m);
-	struct tcpcb *tp = toep->tp_tp; 
-	struct socket *so;
-	int needclose = 0;
-	
-	inp_wlock(tp->t_inpcb);
-	so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
-	if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) {
-		toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN);
-		m_free(m);
-		goto skip;
+	struct toepcb *toep = tp->t_toe;
+	struct adapter *sc = toep->tp_tod->tod_softc;
+
+	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40;
+
+	if (G_TCPOPT_TSTAMP(tcpopt)) {
+		tp->t_flags |= TF_RCVD_TSTMP;
+		tp->t_flags |= TF_REQ_TSTMP;	/* forcibly set */
+		tp->ts_recent = 0;		/* XXX */
+		tp->ts_recent_age = tcp_ts_getticks();
+		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
 	}
 
-	toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
-	/*
-	 * Three cases to consider:
-	 * a) We haven't sent an abort_req; close the connection.
-	 * b) We have sent a post-close abort_req that will get to TP too late
-	 *    and will generate a CPL_ERR_ABORT_FAILED reply.  The reply will
-	 *    be ignored and the connection should be closed now.
-	 * c) We have sent a regular abort_req that will get to TP too late.
-	 *    That will generate an abort_rpl with status 0, wait for it.
-	 */
-	if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) ||
-	    (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) {
-		int error;
-		
-		error = abort_status_to_errno(so, req->status,
-		    &rst_status);
-		so_error_set(so, error);
+	if (G_TCPOPT_SACK(tcpopt))
+		tp->t_flags |= TF_SACK_PERMIT;
+	else
+		tp->t_flags &= ~TF_SACK_PERMIT;
 
-		if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
-			so_sorwakeup(so);
-		/*
-		 * SYN_RECV needs special processing.  If abort_syn_rcv()
-		 * returns 0 is has taken care of the abort.
-		 */
-		if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
-			goto skip;
+	if (G_TCPOPT_WSCALE_OK(tcpopt))
+		tp->t_flags |= TF_RCVD_SCALE;
 
-		t3_release_offload_resources(toep);
-		needclose = 1;
+	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
+	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
+		tp->rcv_scale = tp->request_r_scale;
+		tp->snd_scale = G_TCPOPT_SND_WSCALE(tcpopt);
 	}
-	inp_wunlock(tp->t_inpcb);
 
-	if (needclose)
-		tcp_offload_close(tp);
-
-	send_abort_rpl(m, tdev, rst_status);
-	return;
-skip:
-	inp_wunlock(tp->t_inpcb);	
 }
 
 /*
- * Handle an ABORT_REQ_RSS CPL message.
+ * The ISS and IRS are from after the exchange of SYNs and are off by 1.
  */
-static int
-do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+void
+make_established(struct socket *so, uint32_t cpl_iss, uint32_t cpl_irs,
+    uint16_t cpl_tcpopt)
 {
-	const struct cpl_abort_req_rss *req = cplhdr(m);
-	struct toepcb *toep = (struct toepcb *)ctx;
-	
-	if (is_neg_adv_abort(req->status)) {
-		m_free(m);
-		return (0);
-	}
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = intotcpcb(inp);
+	struct toepcb *toep = tp->t_toe;
+	long bufsize;
+	uint32_t iss = be32toh(cpl_iss) - 1;	/* true ISS */
+	uint32_t irs = be32toh(cpl_irs) - 1;	/* true IRS */
+	uint16_t tcpopt = be16toh(cpl_tcpopt);
 
-	log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid);
-	
-	if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) {
-		cxgb_remove_tid(cdev, toep, toep->tp_tid);
-		toep->tp_flags |= TP_ABORT_REQ_RCVD;
-		
-		send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST);
-		if (toep->tp_l2t) 
-			l2t_release(L2DATA(cdev), toep->tp_l2t);
+	INP_WLOCK_ASSERT(inp);
 
-		/*
-		 *  Unhook
-		 */
-		toep->tp_tp->t_toe = NULL;
-		toep->tp_tp->t_flags &= ~TF_TOE;
-		toep->tp_tp = NULL;
-		/*
-		 * XXX need to call syncache_chkrst - but we don't
-		 * have a way of doing that yet
-		 */
-		toepcb_release(toep);
-		log(LOG_ERR, "abort for unestablished connection :-(\n");
-		return (0);
-	}
-	if (toep->tp_tp == NULL) {
-		log(LOG_NOTICE, "disconnected toepcb\n");
-		/* should be freed momentarily */
-		return (0);
-	}
+	tp->t_state = TCPS_ESTABLISHED;
+	tp->t_starttime = ticks;
+	TCPSTAT_INC(tcps_connects);
 
+	CTR4(KTR_CXGB, "%s tid %u, toep %p, inp %p", tcpstates[tp->t_state],
+	    toep->tp_tid, toep, inp);
 
-	toepcb_hold(toep);
-	process_abort_req(toep, m, toep->tp_toedev);
-	toepcb_release(toep);
-	return (0);
-}
-#ifdef notyet
-static void
-pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m)
-{
-	struct toedev *tdev = TOE_DEV(parent);
-
-	do_abort_syn_rcv(child, parent);
-	if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) {
-		struct cpl_pass_accept_rpl *rpl = cplhdr(m);
-
-		rpl->opt0h = htonl(F_TCAM_BYPASS);
-		rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
-		cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
-	} else
-		m_free(m);
-}
-#endif
-static void
-handle_pass_open_arp_failure(struct socket *so, struct mbuf *m)
-{
-	CXGB_UNIMPLEMENTED();
-	
-#ifdef notyet	
-	struct t3cdev *cdev;
-	struct socket *parent;
-	struct socket *oreq;
-	struct t3c_tid_entry *t3c_stid;
-	struct tid_info *t;
-	struct tcpcb *otp, *tp = so_sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
-	
-	/*
-	 * If the connection is being aborted due to the parent listening
-	 * socket going away there's nothing to do, the ABORT_REQ will close
-	 * the connection.
-	 */
-	if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
-		m_free(m);
-		return;
-	}
-
-	oreq = so->so_incomp;
-	otp = so_sototcpcb(oreq);
-	
-	cdev = T3C_DEV(so);
-	t = &(T3C_DATA(cdev))->tid_maps;
-	t3c_stid = lookup_stid(t, otp->ts_recent);
-	parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
-
-	so_lock(parent);
-	pass_open_abort(so, parent, m);
-	so_unlock(parent);
-#endif	
-}
-
-/*
- * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL.  This is treated similarly
- * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV
- * connection.
- */
-static void
-pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m)
-{
-
-#ifdef notyet	
-	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
-	BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk);
-#endif
-	handle_pass_open_arp_failure(m_get_socket(m), m);
-}
-
-/*
- * Populate a reject CPL_PASS_ACCEPT_RPL WR.
- */
-static void
-mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf)
-{
-	struct cpl_pass_accept_req *req = cplhdr(req_mbuf);
-	struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf);
-	unsigned int tid = GET_TID(req);
-
-	m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP);
-	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
-	rpl->peer_ip = req->peer_ip;   // req->peer_ip not overwritten yet
-	rpl->opt0h = htonl(F_TCAM_BYPASS);
-	rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
-	rpl->opt2 = 0;
-	rpl->rsvd = rpl->opt2;   /* workaround for HW bug */
-}
-
-/*
- * Send a deferred reject to an accept request.
- */
-static void
-reject_pass_request(struct toedev *tdev, struct mbuf *m)
-{
-	struct mbuf *reply_mbuf;
-
-	reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl));
-	mk_pass_accept_rpl(reply_mbuf, m);
-	cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
-	m_free(m);
-}
-
-static void
-handle_syncache_event(int event, void *arg)
-{
-	struct toepcb *toep = arg;
-
-	switch (event) {
-	case TOE_SC_ENTRY_PRESENT:
-		/*
-		 * entry already exists - free toepcb
-		 * and l2t
-		 */
-		printf("syncache entry present\n");
-		toepcb_release(toep);
-		break;
-	case TOE_SC_DROP:
-		/*
-		 * The syncache has given up on this entry
-		 * either it timed out, or it was evicted
-		 * we need to explicitly release the tid
-		 */
-		printf("syncache entry dropped\n");
-		toepcb_release(toep);		
-		break;
-	default:
-		log(LOG_ERR, "unknown syncache event %d\n", event);
-		break;
-	}
-}
-
-static void
-syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
-{
-	struct in_conninfo inc;
-	struct toeopt toeo;
-	struct tcphdr th;
-	struct inpcb *inp;
-	int mss, wsf, sack, ts;
-	uint32_t rcv_isn = ntohl(req->rcv_isn);
-	
-	bzero(&toeo, sizeof(struct toeopt));
-	inp = so_sotoinpcb(lso);
-	
-	/*
-	 * Fill out information for entering us into the syncache
-	 */
-	bzero(&inc, sizeof(inc));
-	inc.inc_fport = th.th_sport = req->peer_port;
-	inc.inc_lport = th.th_dport = req->local_port;
-	th.th_seq = req->rcv_isn;
-	th.th_flags = TH_SYN;
-
-	toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
-
-	inc.inc_len = 0;
-	inc.inc_faddr.s_addr = req->peer_ip;
-	inc.inc_laddr.s_addr = req->local_ip;
-
-	DPRINTF("syncache add of %d:%d %d:%d\n",
-	    ntohl(req->local_ip), ntohs(req->local_port),
-	    ntohl(req->peer_ip), ntohs(req->peer_port));
-	
-	mss = req->tcp_options.mss;
-	wsf = req->tcp_options.wsf;
-	ts = req->tcp_options.tstamp;
-	sack = req->tcp_options.sack;
-	toeo.to_mss = mss;
-	toeo.to_wscale = wsf;
-	toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
-	tcp_offload_syncache_add(&inc, &toeo, &th, inp, &lso, &cxgb_toe_usrreqs,
-toep);
-}
-
-
-/*
- * Process a CPL_PASS_ACCEPT_REQ message.  Does the part that needs the socket
- * lock held.  Note that the sock here is a listening socket that is not owned
- * by the TOE.
- */
-static void
-process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev,
-    struct listen_ctx *lctx)
-{
-	int rt_flags;
-	struct l2t_entry *e;
-	struct iff_mac tim;
-	struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
-	struct cpl_pass_accept_rpl *rpl;
-	struct cpl_pass_accept_req *req = cplhdr(m);
-	unsigned int tid = GET_TID(req);
-	struct tom_data *d = TOM_DATA(tdev);
-	struct t3cdev *cdev = d->cdev;
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct toepcb *newtoep;
-	struct rtentry *dst;
-	struct sockaddr_in nam;
-	struct t3c_data *td = T3C_DATA(cdev);
-
-	reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
-	if (__predict_false(reply_mbuf == NULL)) {
-		if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
-			t3_defer_reply(m, tdev, reject_pass_request);
-		else {
-			cxgb_queue_tid_release(cdev, tid);
-			m_free(m);
-		}
-		DPRINTF("failed to get reply_mbuf\n");
-		
-		goto out;
-	}
-
-	if (tp->t_state != TCPS_LISTEN) {
-		DPRINTF("socket not in listen state\n");
-		
-		goto reject;
-	}
-	
-	tim.mac_addr = req->dst_mac;
-	tim.vlan_tag = ntohs(req->vlan_tag);
-	if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
-		DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n");
-		goto reject;
-	}
-	
-#ifdef notyet
-	/*
-	 * XXX do route lookup to confirm that we're still listening on this
-	 * address
-	 */
-	if (ip_route_input(skb, req->local_ip, req->peer_ip,
-			   G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev))
-		goto reject;
-	rt_flags = ((struct rtable *)skb->dst)->rt_flags &
-		(RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL);
-	dst_release(skb->dst);	// done with the input route, release it
-	skb->dst = NULL;
-	
-	if ((rt_flags & RTF_LOCAL) == 0)
-		goto reject;
-#endif
-	/*
-	 * XXX
-	 */
-	rt_flags = RTF_LOCAL;
-	if ((rt_flags & RTF_LOCAL) == 0)
-		goto reject;
-	
-	/*
-	 * Calculate values and add to syncache
-	 */
-
-	newtoep = toepcb_alloc();
-	if (newtoep == NULL)
-		goto reject;
-
-	bzero(&nam, sizeof(struct sockaddr_in));
-	
-	nam.sin_len = sizeof(struct sockaddr_in);
-	nam.sin_family = AF_INET;
-	nam.sin_addr.s_addr =req->peer_ip;
-	dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
-
-	if (dst == NULL) {
-		printf("failed to find route\n");
-		goto reject;
-	}
-	e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
-	    (struct sockaddr *)&nam);
-	if (e == NULL) {
-		DPRINTF("failed to get l2t\n");
-	}
-	/*
-	 * Point to our listen socket until accept
-	 */
-	newtoep->tp_tp = tp;
-	newtoep->tp_flags = TP_SYN_RCVD;
-	newtoep->tp_tid = tid;
-	newtoep->tp_toedev = tdev;
-	tp->rcv_wnd = select_rcv_wnd(tdev, so);
-	
-	cxgb_insert_tid(cdev, d->client, newtoep, tid);
-	so_lock(so);
-	LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
-	so_unlock(so);
-
-	newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
-		       tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
-
-	if (newtoep->tp_ulp_mode) {
-		ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
-		
-		if (ddp_mbuf == NULL)
-			newtoep->tp_ulp_mode = 0;
-	}
-	
-	CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d",
-	    TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
-	set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
-	/*
-	 * XXX workaround for lack of syncache drop
-	 */
-	toepcb_hold(newtoep);
-	syncache_add_accept_req(req, so, newtoep);
-	
-	rpl = cplhdr(reply_mbuf);
-	reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
-	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	rpl->wr.wr_lo = 0;
-	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
-	rpl->opt2 = htonl(calc_opt2(so, tdev));
-	rpl->rsvd = rpl->opt2;                /* workaround for HW bug */
-	rpl->peer_ip = req->peer_ip;	// req->peer_ip is not overwritten
-
-	rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
-	    V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
-	rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) |
-				  CPL_PASS_OPEN_ACCEPT);
-
-	DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status);
-	
-	m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep));
-		
-	l2t_send(cdev, reply_mbuf, e);
-	m_free(m);
-	if (newtoep->tp_ulp_mode) {	
-		__set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
-				V_TF_DDP_OFF(1) |
-				TP_DDP_TIMER_WORKAROUND_MASK,
-				V_TF_DDP_OFF(1) |
-		    TP_DDP_TIMER_WORKAROUND_VAL, 1);
-	} else
-		DPRINTF("no DDP\n");
-
-	return;
-reject:
-	if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
-		mk_pass_accept_rpl(reply_mbuf, m);
-	else 
-		mk_tid_release(reply_mbuf, newtoep, tid);
-	cxgb_ofld_send(cdev, reply_mbuf);
-	m_free(m);
-out:
-#if 0
-	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
-#else
-	return;
-#endif	
-}      
-
-/*
- * Handle a CPL_PASS_ACCEPT_REQ message.
- */
-static int
-do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
-	struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */
-	struct tom_data *d = listen_ctx->tom_data;
-
-#if VALIDATE_TID
-	struct cpl_pass_accept_req *req = cplhdr(m);
-	unsigned int tid = GET_TID(req);
-	struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
-
-	if (unlikely(!lsk)) {
-		printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n",
-		       cdev->name,
-		       (unsigned long)((union listen_entry *)ctx -
-					t->stid_tab));
-		return CPL_RET_BUF_DONE;
-	}
-	if (unlikely(tid >= t->ntids)) {
-		printk(KERN_ERR "%s: passive open TID %u too large\n",
-		       cdev->name, tid);
-		return CPL_RET_BUF_DONE;
-	}
-	/*
-	 * For T3A the current user of the TID may have closed but its last
-	 * message(s) may have been backlogged so the TID appears to be still
-	 * in use.  Just take the TID away, the connection can close at its
-	 * own leisure.  For T3B this situation is a bug.
-	 */
-	if (!valid_new_tid(t, tid) &&
-	    cdev->type != T3A) {
-		printk(KERN_ERR "%s: passive open uses existing TID %u\n",
-		       cdev->name, tid);
-		return CPL_RET_BUF_DONE;
-	}
-#endif
-
-	process_pass_accept_req(lso, m, &d->tdev, listen_ctx);
-	return (0);
-}
-
-/*
- * Called when a connection is established to translate the TCP options
- * reported by HW to FreeBSD's native format.
- */
-static void
-assign_rxopt(struct socket *so, unsigned int opt)
-{
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
-	const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep));
-
-	inp_lock_assert(tp->t_inpcb);
-	
-	toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40;
-	tp->t_flags         |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0;
-	tp->t_flags         |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0;
-	tp->t_flags 	    |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0;
-	if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
-	    (TF_RCVD_SCALE|TF_REQ_SCALE))
-		tp->rcv_scale = tp->request_r_scale;
-}
-
-/*
- * Completes some final bits of initialization for just established connections
- * and changes their state to TCP_ESTABLISHED.
- *
- * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
- */
-static void
-make_established(struct socket *so, u32 snd_isn, unsigned int opt)
-{
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
-	
-	toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
-	assign_rxopt(so, opt);
+	tp->irs = irs;
+	tcp_rcvseqinit(tp);
+	tp->rcv_wnd = toep->tp_rx_credits << 10;
+	tp->rcv_adv += tp->rcv_wnd;
+	tp->last_ack_sent = tp->rcv_nxt;
 
 	/*
-	 *XXXXXXXXXXX
-	 * 
+	 * If we were unable to send all rx credits via opt0, save the remainder
+	 * in rx_credits so that they can be handed over with the next credit
+	 * update.
 	 */
-#ifdef notyet
-	so->so_proto->pr_ctloutput = t3_ctloutput;
-#endif
-	
-#if 0	
-	inet_sk(sk)->id = tp->write_seq ^ jiffies;
-#endif	
-	/*
-	 * XXX not clear what rcv_wup maps to
-	 */
-	/*
-	 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
-	 * pass through opt0.
-	 */
-	if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
-		toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
+	SOCKBUF_LOCK(&so->so_rcv);
+	bufsize = select_rcv_wnd(so);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	toep->tp_rx_credits = bufsize - tp->rcv_wnd;
 
-	dump_toepcb(toep);
+	tp->iss = iss;
+	tcp_sendseqinit(tp);
+	tp->snd_una = iss + 1;
+	tp->snd_nxt = iss + 1;
+	tp->snd_max = iss + 1;
 
-#ifdef notyet
-/*
- * no clean interface for marking ARP up to date
- */
-	dst_confirm(sk->sk_dst_cache);
-#endif
-	tp->t_starttime = ticks;
-	tp->t_state = TCPS_ESTABLISHED;
+	assign_rxopt(tp, tcpopt);
 	soisconnected(so);
 }
 
-static int
-syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
-{
-
-	struct in_conninfo inc;
-	struct toeopt toeo;
-	struct tcphdr th;
-	int mss, wsf, sack, ts;
-	struct mbuf *m = NULL;
-	const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
-	unsigned int opt;
-	
-#ifdef MAC
-#error	"no MAC support"
-#endif	
-	
-	opt = ntohs(req->tcp_opt);
-	
-	bzero(&toeo, sizeof(struct toeopt));
-	
-	/*
-	 * Fill out information for entering us into the syncache
-	 */
-	bzero(&inc, sizeof(inc));
-	inc.inc_fport = th.th_sport = req->peer_port;
-	inc.inc_lport = th.th_dport = req->local_port;
-	th.th_seq = req->rcv_isn;
-	th.th_flags = TH_ACK;
-	
-	inc.inc_len = 0;
-	inc.inc_faddr.s_addr = req->peer_ip;
-	inc.inc_laddr.s_addr = req->local_ip;
-	
-	mss  = td->mtus[G_TCPOPT_MSS(opt)] - 40;
-	wsf  = G_TCPOPT_WSCALE_OK(opt);
-	ts   = G_TCPOPT_TSTAMP(opt);
-	sack = G_TCPOPT_SACK(opt);
-	
-	toeo.to_mss = mss;
-	toeo.to_wscale =  G_TCPOPT_SND_WSCALE(opt);
-	toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
-
-	DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
-	    ntohl(req->local_ip), ntohs(req->local_port),
-	    ntohl(req->peer_ip), ntohs(req->peer_port),
-	    mss, wsf, ts, sack);
-	return tcp_offload_syncache_expand(&inc, &toeo, &th, so, m);
-}
-
-
-/*
- * Process a CPL_PASS_ESTABLISH message.  XXX a lot of the locking doesn't work
- * if we are in TCP_SYN_RECV due to crossed SYNs
- */
-static int
-do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct cpl_pass_establish *req = cplhdr(m);
-	struct toepcb *toep = (struct toepcb *)ctx;
-	struct tcpcb *tp = toep->tp_tp;
-	struct socket *so, *lso;
-	struct t3c_data *td = T3C_DATA(cdev);
-	struct sockbuf *snd, *rcv;
-	
-	// Complete socket initialization now that we have the SND_ISN
-	
-	struct toedev *tdev;
-
-
-	tdev = toep->tp_toedev;
-
-	inp_wlock(tp->t_inpcb);
-	
-	/*
-	 *
-	 * XXX need to add reference while we're manipulating
-	 */
-	so = lso = inp_inpcbtosocket(tp->t_inpcb);
-
-	inp_wunlock(tp->t_inpcb);
-
-	so_lock(so);
-	LIST_REMOVE(toep, synq_entry);
-	so_unlock(so);
-	
-	if (!syncache_expand_establish_req(req, &so, toep)) {
-		/*
-		 * No entry 
-		 */
-		CXGB_UNIMPLEMENTED();
-	}
-	if (so == NULL) {
-		/*
-		 * Couldn't create the socket
-		 */
-		CXGB_UNIMPLEMENTED();
-	}
-
-	tp = so_sototcpcb(so);
-	inp_wlock(tp->t_inpcb);
-
-	snd = so_sockbuf_snd(so);
-	rcv = so_sockbuf_rcv(so);
-
-	snd->sb_flags |= SB_NOCOALESCE;
-	rcv->sb_flags |= SB_NOCOALESCE;
-
-	toep->tp_tp = tp;
-	toep->tp_flags = 0;
-	tp->t_toe = toep;
-	reset_wr_list(toep);
-	tp->rcv_wnd = select_rcv_wnd(tdev, so);
-	tp->rcv_nxt = toep->tp_copied_seq;
-	install_offload_ops(so);
-	
-	toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
-	toep->tp_wr_unacked = 0;
-	toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
-	toep->tp_qset_idx = 0;
-	toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
-	
-	/*
-	 * XXX Cancel any keep alive timer
-	 */
-	     
-	make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
-
-	/*
-	 * XXX workaround for lack of syncache drop
-	 */
-	toepcb_release(toep);
-	inp_wunlock(tp->t_inpcb);
-	
-	CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid);
-	cxgb_log_tcb(cdev->adapter, toep->tp_tid);
-#ifdef notyet
-	/*
-	 * XXX not sure how these checks map to us
-	 */
-	if (unlikely(sk->sk_socket)) {   // simultaneous opens only
-		sk->sk_state_change(sk);
-		sk_wake_async(so, 0, POLL_OUT);
-	}
-	/*
-	 * The state for the new connection is now up to date.
-	 * Next check if we should add the connection to the parent's
-	 * accept queue.  When the parent closes it resets connections
-	 * on its SYN queue, so check if we are being reset.  If so we
-	 * don't need to do anything more, the coming ABORT_RPL will
-	 * destroy this socket.  Otherwise move the connection to the
-	 * accept queue.
-	 *
-	 * Note that we reset the synq before closing the server so if
-	 * we are not being reset the stid is still open.
-	 */
-	if (unlikely(!tp->forward_skb_hint)) { // removed from synq
-		__kfree_skb(skb);
-		goto unlock;
-	}
-#endif
-	m_free(m);
-
-	return (0);
-}
-
 /*
  * Fill in the right TID for CPL messages waiting in the out-of-order queue
  * and send them to the TOE.
@@ -3745,48 +1623,70 @@
 fixup_and_send_ofo(struct toepcb *toep)
 {
 	struct mbuf *m;
-	struct toedev *tdev = toep->tp_toedev;
-	struct tcpcb *tp = toep->tp_tp;
+	struct toedev *tod = toep->tp_tod;
+	struct adapter *sc = tod->tod_softc;
+	struct inpcb *inp = toep->tp_inp;
 	unsigned int tid = toep->tp_tid;
 
-	log(LOG_NOTICE, "fixup_and_send_ofo\n");
-	
-	inp_lock_assert(tp->t_inpcb);
+	inp_lock_assert(inp);
+
 	while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
+		struct ofld_hdr *oh = mtod(m, void *);
 		/*
 		 * A variety of messages can be waiting but the fields we'll
 		 * be touching are common to all so any message type will do.
 		 */
-		struct cpl_close_con_req *p = cplhdr(m);
+		struct cpl_close_con_req *p = (void *)(oh + 1);
 
-		p->wr.wr_lo = htonl(V_WR_TID(tid));
+		p->wr.wrh_lo = htonl(V_WR_TID(tid));
 		OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
-		cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
+		t3_offload_tx(sc, m);
 	}
 }
 
 /*
- * Updates socket state from an active establish CPL message.  Runs with the
- * socket lock held.
+ * Process a CPL_ACT_ESTABLISH message.
  */
-static void
-socket_act_establish(struct socket *so, struct mbuf *m)
+static int
+do_act_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	struct cpl_act_establish *req = cplhdr(m);
-	u32 rcv_isn = ntohl(req->rcv_isn);	/* real RCV_ISN + 1 */
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
-	
-	if (__predict_false(tp->t_state != TCPS_SYN_SENT))
-		log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
-		    toep->tp_tid, tp->t_state);
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct cpl_act_establish *req = mtod(m, void *);
+	unsigned int tid = GET_TID(req);
+	unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
+	struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
+	struct inpcb *inp = toep->tp_inp;
+	struct tcpcb *tp;
+	struct socket *so; 
 
-	tp->ts_recent_age = ticks;
-	tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn;
-	toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs;
+	CTR3(KTR_CXGB, "%s: atid %u, tid %u", __func__, atid, tid);
 
-	make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
-	
+	free_atid(&td->tid_maps, atid);
+
+	INP_WLOCK(inp);
+	tp = intotcpcb(inp);
+
+	KASSERT(toep->tp_qset == qs->idx,
+	    ("%s qset mismatch %d %d", __func__, toep->tp_qset, qs->idx));
+	KASSERT(toep->tp_tid == atid,
+	    ("%s atid mismatch %d %d", __func__, toep->tp_tid, atid));
+
+	toep->tp_tid = tid;
+	insert_tid(td, toep, tid);
+
+	if (inp->inp_flags & INP_DROPPED) {
+		/* socket closed by the kernel before hw told us it connected */
+		send_reset(toep);
+		goto done;
+	}
+
+	KASSERT(tp->t_state == TCPS_SYN_SENT,
+	    ("TID %u expected TCPS_SYN_SENT, found %d.", tid, tp->t_state));
+
+	so = inp->inp_socket;
+	make_established(so, req->snd_isn, req->rcv_isn, req->tcp_opt);
+
 	/*
 	 * Now that we finally have a TID send any CPL messages that we had to
 	 * defer for lack of a TID.
@@ -3794,80 +1694,9 @@
 	if (mbufq_len(&toep->out_of_order_queue))
 		fixup_and_send_ofo(toep);
 
-	if (__predict_false(so_state_get(so) & SS_NOFDREF)) {
-		/*
-		 * XXX does this even make sense?
-		 */
-		so_sorwakeup(so);
-	}
-	m_free(m);
-#ifdef notyet
-/*
- * XXX assume no write requests permitted while socket connection is
- * incomplete
- */
-	/*
-	 * Currently the send queue must be empty at this point because the
-	 * socket layer does not send anything before a connection is
-	 * established.  To be future proof though we handle the possibility
-	 * that there are pending buffers to send (either TX_DATA or
-	 * CLOSE_CON_REQ).  First we need to adjust the sequence number of the
-	 * buffers according to the just learned write_seq, and then we send
-	 * them on their way.
-	 */
-	fixup_pending_writeq_buffers(sk);
-	if (t3_push_frames(so, 1))
-		sk->sk_write_space(sk);
-#endif
-
-	toep->tp_state = tp->t_state;
-	KMOD_TCPSTAT_INC(tcps_connects);
-				
-}
-
-/*
- * Process a CPL_ACT_ESTABLISH message.
- */
-static int
-do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct cpl_act_establish *req = cplhdr(m);
-	unsigned int tid = GET_TID(req);
-	unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
-	struct toepcb *toep = (struct toepcb *)ctx;
-	struct tcpcb *tp = toep->tp_tp;
-	struct socket *so; 
-	struct toedev *tdev;
-	struct tom_data *d;
-	
-	if (tp == NULL) {
-		free_atid(cdev, atid);
-		return (0);
-	}
-	inp_wlock(tp->t_inpcb);
-
-	/*
-	 * XXX
-	 */
-	so = inp_inpcbtosocket(tp->t_inpcb);
-	tdev = toep->tp_toedev; /* blow up here if link was down */
-	d = TOM_DATA(tdev);
-
-	/*
-	 * It's OK if the TID is currently in use, the owning socket may have
-	 * backlogged its last CPL message(s).  Just take it away.
-	 */
-	toep->tp_tid = tid;
-	toep->tp_tp = tp;
-	so_insert_tid(d, toep, tid);
-	free_atid(cdev, atid);
-	toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
-
-	socket_act_establish(so, m);
-	inp_wunlock(tp->t_inpcb);
-	CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid);
-	cxgb_log_tcb(cdev->adapter, toep->tp_tid);
-
+done:
+	INP_WUNLOCK(inp);
+	m_freem(m);
 	return (0);
 }
 
@@ -3878,97 +1707,66 @@
 static void
 wr_ack(struct toepcb *toep, struct mbuf *m)
 {
-	struct tcpcb *tp = toep->tp_tp;
-	struct cpl_wr_ack *hdr = cplhdr(m);
+	struct inpcb *inp = toep->tp_inp;
+	struct tcpcb *tp;
+	struct cpl_wr_ack *hdr = mtod(m, void *);
 	struct socket *so;
 	unsigned int credits = ntohs(hdr->credits);
 	u32 snd_una = ntohl(hdr->snd_una);
 	int bytes = 0;
 	struct sockbuf *snd;
-	
-	CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits);
+	struct mbuf *p;
+	struct ofld_hdr *oh;
 
-	inp_wlock(tp->t_inpcb);
-	so = inp_inpcbtosocket(tp->t_inpcb);
+	inp_wlock(inp);
+	tp = intotcpcb(inp);
+	so = inp->inp_socket;
 	toep->tp_wr_avail += credits;
 	if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
 		toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
 
 	while (credits) {
-		struct mbuf *p = peek_wr(toep);
-		
+		p = peek_wr(toep);
+
 		if (__predict_false(!p)) {
+			CTR5(KTR_CXGB, "%s: %u extra WR_ACK credits, "
+			    "tid %u, state %u, wr_avail %u", __func__, credits,
+			    toep->tp_tid, tp->t_state, toep->tp_wr_avail);
+
 			log(LOG_ERR, "%u WR_ACK credits for TID %u with "
 			    "nothing pending, state %u wr_avail=%u\n",
 			    credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
 			break;
 		}
-		CTR2(KTR_TOM,
-			"wr_ack: p->credits=%d p->bytes=%d",
-		    p->m_pkthdr.csum_data, p->m_pkthdr.len);
-		KASSERT(p->m_pkthdr.csum_data != 0,
-		    ("empty request still on list"));
 
-		if (__predict_false(credits < p->m_pkthdr.csum_data)) {
+		oh = mtod(p, struct ofld_hdr *);
 
-#if DEBUG_WR > 1
-			struct tx_data_wr *w = cplhdr(p);
-			log(LOG_ERR,
-			       "TID %u got %u WR credits, need %u, len %u, "
-			       "main body %u, frags %u, seq # %u, ACK una %u,"
-			       " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n",
-			       toep->tp_tid, credits, p->csum, p->len,
-			       p->len - p->data_len, skb_shinfo(p)->nr_frags,
-			       ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
-			    toep->tp_wr_avail, count_pending_wrs(tp) - credits);
-#endif
-			p->m_pkthdr.csum_data -= credits;
-			break;
-		} else {
-			dequeue_wr(toep);
-			credits -= p->m_pkthdr.csum_data;
-			bytes += p->m_pkthdr.len;
-			CTR3(KTR_TOM,
-			    "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d",
-			    p->m_pkthdr.len, credits, p->m_pkthdr.csum_data);
-	
-			m_free(p);
-		}
+		KASSERT(credits >= G_HDR_NDESC(oh->flags),
+		    ("%s: partial credits?  %d %d", __func__, credits,
+		    G_HDR_NDESC(oh->flags)));
+
+		dequeue_wr(toep);
+		credits -= G_HDR_NDESC(oh->flags);
+		bytes += oh->plen;
+
+		if (oh->flags & F_HDR_SGL)
+			sglist_free(oh->sgl);
+		m_freem(p);
 	}
 
-#if DEBUG_WR
-	check_wr_invariants(tp);
-#endif
-
-	if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
-#if VALIDATE_SEQ
-		struct tom_data *d = TOM_DATA(TOE_DEV(so));
-
-		log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK "
-		    "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una,
-		    toep->tp_tid, tp->snd_una);
-#endif
+	if (__predict_false(SEQ_LT(snd_una, tp->snd_una)))
 		goto out_free;
-	}
 
 	if (tp->snd_una != snd_una) {
 		tp->snd_una = snd_una;
-		tp->ts_recent_age = ticks;
-#ifdef notyet
-		/*
-		 * Keep ARP entry "minty fresh"
-		 */
-		dst_confirm(sk->sk_dst_cache);
-#endif
+		tp->ts_recent_age = tcp_ts_getticks();
 		if (tp->snd_una == tp->snd_nxt)
 			toep->tp_flags &= ~TP_TX_WAIT_IDLE;
 	}
 
 	snd = so_sockbuf_snd(so);
 	if (bytes) {
-		CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes);
-		snd = so_sockbuf_snd(so);
-		sockbuf_lock(snd);		
+		SOCKBUF_LOCK(snd);
 		sbdrop_locked(snd, bytes);
 		so_sowwakeup_locked(so);
 	}
@@ -3978,142 +1776,25 @@
 
 out_free:
 	inp_wunlock(tp->t_inpcb);
-	m_free(m);
+	m_freem(m);
 }
 
 /*
  * Handler for TX_DATA_ACK CPL messages.
  */
 static int
-do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
+do_wr_ack(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
-	struct toepcb *toep = (struct toepcb *)ctx;
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct cpl_wr_ack *hdr = mtod(m, void *);
+	unsigned int tid = GET_TID(hdr);
+	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 
-	VALIDATE_SOCK(so);
+	/* XXX bad race */
+	if (toep)
+		wr_ack(toep, m);
 
-	wr_ack(toep, m);
-	return 0;
-}
-
-/*
- * Handler for TRACE_PKT CPL messages.  Just sink these packets.
- */
-static int
-do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx)
-{
-	m_freem(m);
-	return 0;
-}
-
-/*
- * Reset a connection that is on a listener's SYN queue or accept queue,
- * i.e., one that has not had a struct socket associated with it.
- * Must be called from process context.
- *
- * Modeled after code in inet_csk_listen_stop().
- */
-static void
-t3_reset_listen_child(struct socket *child)
-{
-	struct tcpcb *tp = so_sototcpcb(child);
-	
-	t3_send_reset(tp->t_toe);
-}
-
-
-static void
-t3_child_disconnect(struct socket *so, void *arg)
-{
-	struct tcpcb *tp = so_sototcpcb(so);
-		
-	if (tp->t_flags & TF_TOE) {
-		inp_wlock(tp->t_inpcb);
-		t3_reset_listen_child(so);
-		inp_wunlock(tp->t_inpcb);
-	}	
-}
-
-/*
- * Disconnect offloaded established but not yet accepted connections sitting
- * on a server's accept_queue.  We just send an ABORT_REQ at this point and
- * finish off the disconnect later as we may need to wait for the ABORT_RPL.
- */
-void
-t3_disconnect_acceptq(struct socket *listen_so)
-{
-
-	so_lock(listen_so);
-	so_listeners_apply_all(listen_so, t3_child_disconnect, NULL);
-	so_unlock(listen_so);
-}
-
-/*
- * Reset offloaded connections sitting on a server's syn queue.  As above
- * we send ABORT_REQ and finish off when we get ABORT_RPL.
- */
-
-void
-t3_reset_synq(struct listen_ctx *lctx)
-{
-	struct toepcb *toep;
-
-	so_lock(lctx->lso);	
-	while (!LIST_EMPTY(&lctx->synq_head)) {
-		toep = LIST_FIRST(&lctx->synq_head);
-		LIST_REMOVE(toep, synq_entry);
-		toep->tp_tp = NULL;
-		t3_send_reset(toep);
-		cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid);
-		toepcb_release(toep);
-	}
-	so_unlock(lctx->lso); 
-}
-
-
-int
-t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
-		   unsigned int nppods, unsigned int tag, unsigned int maxoff,
-		   unsigned int pg_off, unsigned int color)
-{
-	unsigned int i, j, pidx;
-	struct pagepod *p;
-	struct mbuf *m;
-	struct ulp_mem_io *req;
-	unsigned int tid = toep->tp_tid;
-	const struct tom_data *td = TOM_DATA(toep->tp_toedev);
-	unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
-
-	CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)",
-	    gl, nppods, tag, maxoff, pg_off, color);
-	
-	for (i = 0; i < nppods; ++i) {
-		m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
-		m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-		req = mtod(m, struct ulp_mem_io *);
-		m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE;
-		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
-		req->wr.wr_lo = 0;
-		req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) |
-					   V_ULPTX_CMD(ULP_MEM_WRITE));
-		req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) |
-				 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1));
-
-		p = (struct pagepod *)(req + 1);
-		if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) {
-			p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
-			p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) |
-						  V_PPOD_COLOR(color));
-			p->pp_max_offset = htonl(maxoff);
-			p->pp_page_offset = htonl(pg_off);
-			p->pp_rsvd = 0;
-			for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx)
-				p->pp_addr[j] = pidx < gl->dgl_nelem ?
-				    htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0;
-		} else
-			p->pp_vld_tid = 0;   /* mark sentinel page pods invalid */
-		send_or_defer(toep, m, 0);
-		ppod_addr += PPOD_SIZE;
-	}
 	return (0);
 }
 
@@ -4153,10 +1834,7 @@
                      unsigned int word, uint64_t mask, uint64_t val)
 {
 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
-	
-	CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
-	    tid, word, mask, val);
-	
+
 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
@@ -4167,294 +1845,19 @@
 	req->val = htobe64(val);
 }
 
-/*
- * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command.
- */
-static void
-mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack,
-    unsigned int tid, unsigned int credits)
+void
+t3_init_cpl_io(struct adapter *sc)
 {
-	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack;
-
-	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
-	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8));
-	OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
-	ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
-	    V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) |
-				 V_RX_CREDITS(credits));
+	t3_register_cpl_handler(sc, CPL_ACT_ESTABLISH, do_act_establish);
+	t3_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl);
+	t3_register_cpl_handler(sc, CPL_RX_URG_NOTIFY, do_rx_urg_notify);
+	t3_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
+	t3_register_cpl_handler(sc, CPL_TX_DMA_ACK, do_wr_ack);
+	t3_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close);
+	t3_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req);
+	t3_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
+	t3_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl);
+	t3_register_cpl_handler(sc, CPL_SMT_WRITE_RPL, do_smt_write_rpl);
+	t3_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);
 }
-
-void
-t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx)
-{
-	unsigned int wrlen;
-	struct mbuf *m;
-	struct work_request_hdr *wr;
-	struct cpl_barrier *lock;
-	struct cpl_set_tcb_field *req;
-	struct cpl_get_tcb *getreq;
-	struct ddp_state *p = &toep->tp_ddp_state;
-
-#if 0
-	SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
 #endif
-	wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) +
-		sizeof(*getreq);
-	m = m_gethdr_nofail(wrlen);
-	m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-	wr = mtod(m, struct work_request_hdr *);
-	bzero(wr, wrlen);
-	
-	wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
-	m->m_pkthdr.len = m->m_len = wrlen;
-
-	lock = (struct cpl_barrier *)(wr + 1);
-	mk_cpl_barrier_ulp(lock);
-
-	req = (struct cpl_set_tcb_field *)(lock + 1);
-
-	CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx);
-
-	/* Hmmm, not sure if this actually a good thing: reactivating
-	 * the other buffer might be an issue if it has been completed
-	 * already. However, that is unlikely, since the fact that the UBUF
-	 * is not completed indicates that there is no oustanding data.
-	 */
-	if (bufidx == 0)
-		mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
-				     V_TF_DDP_ACTIVE_BUF(1) |
-				     V_TF_DDP_BUF0_VALID(1),
-				     V_TF_DDP_ACTIVE_BUF(1));
-	else
-		mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
-				     V_TF_DDP_ACTIVE_BUF(1) |
-				     V_TF_DDP_BUF1_VALID(1), 0);
-
-	getreq = (struct cpl_get_tcb *)(req + 1);
-	mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
-
-	mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1));
-
-	/* Keep track of the number of oustanding CPL_GET_TCB requests
-	 */
-	p->get_tcb_count++;
-	
-#ifdef T3_TRACE
-	T3_TRACE1(TIDTB(so),
-		  "t3_cancel_ddpbuf: bufidx %u", bufidx);
-#endif
-	cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-}
-
-/**
- * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one
- * @sk: the socket associated with the buffers
- * @bufidx: index of HW DDP buffer (0 or 1)
- * @tag0: new tag for HW buffer 0
- * @tag1: new tag for HW buffer 1
- * @len: new length for HW buf @bufidx
- *
- * Sends a compound WR to overlay a new DDP buffer on top of an existing
- * buffer by changing the buffer tag and length and setting the valid and
- * active flag accordingly.  The caller must ensure the new buffer is at
- * least as big as the existing one.  Since we typically reprogram both HW
- * buffers this function sets both tags for convenience. Read the TCB to
- * determine how made data was written into the buffer before the overlay
- * took place.
- */
-void
-t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0,
-	 	       unsigned int tag1, unsigned int len)
-{
-	unsigned int wrlen;
-	struct mbuf *m;
-	struct work_request_hdr *wr;
-	struct cpl_get_tcb *getreq;
-	struct cpl_set_tcb_field *req;
-	struct ddp_state *p = &toep->tp_ddp_state;
-
-	CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)",
-	    bufidx, tag0, tag1, len);
-#if 0
-	SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
-#endif	
-	wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
-	m = m_gethdr_nofail(wrlen);
-	m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-	wr = mtod(m, struct work_request_hdr *);
-	m->m_pkthdr.len = m->m_len = wrlen;
-	bzero(wr, wrlen);
-
-	
-	/* Set the ATOMIC flag to make sure that TP processes the following
-	 * CPLs in an atomic manner and no wire segments can be interleaved.
-	 */
-	wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
-	req = (struct cpl_set_tcb_field *)(wr + 1);
-	mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG,
-			     V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) |
-			     V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32,
-			     V_TCB_RX_DDP_BUF0_TAG(tag0) |
-			     V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32);
-	req++;
-	if (bufidx == 0) {
-		mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN,
-			    V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
-			    V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
-		req++;
-		mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
-			    V_TF_DDP_PUSH_DISABLE_0(1) |
-			    V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
-			    V_TF_DDP_PUSH_DISABLE_0(0) |
-			    V_TF_DDP_BUF0_VALID(1));
-	} else {
-		mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN,
-			    V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN),
-			    V_TCB_RX_DDP_BUF1_LEN((uint64_t)len));
-		req++;
-		mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
-			    V_TF_DDP_PUSH_DISABLE_1(1) |
-			    V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
-			    V_TF_DDP_PUSH_DISABLE_1(0) |
-			    V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1));
-	}
-
-	getreq = (struct cpl_get_tcb *)(req + 1);
-	mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
-
-	/* Keep track of the number of oustanding CPL_GET_TCB requests
-	 */
-	p->get_tcb_count++;
-
-#ifdef T3_TRACE
-	T3_TRACE4(TIDTB(sk),
-		  "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u "
-		  "len %d",
-		  bufidx, tag0, tag1, len);
-#endif
-	cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-}
-
-/*
- * Sends a compound WR containing all the CPL messages needed to program the
- * two HW DDP buffers, namely optionally setting up the length and offset of
- * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK.
- */
-void
-t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0,
-		      unsigned int len1, unsigned int offset1,
-                      uint64_t ddp_flags, uint64_t flag_mask, int modulate)
-{
-	unsigned int wrlen;
-	struct mbuf *m;
-	struct work_request_hdr *wr;
-	struct cpl_set_tcb_field *req;
-
-	CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ",
-	    len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff);
-	
-#if 0
-	SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
-#endif
-	wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
-		(len1 ? sizeof(*req) : 0) +
-		(modulate ? sizeof(struct cpl_rx_data_ack) : 0);
-	m = m_gethdr_nofail(wrlen);
-	m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-	wr = mtod(m, struct work_request_hdr *);
-	bzero(wr, wrlen);
-	
-	wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
-	m->m_pkthdr.len = m->m_len = wrlen;
-
-	req = (struct cpl_set_tcb_field *)(wr + 1);
-	if (len0) {                  /* program buffer 0 offset and length */
-		mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
-			V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
-			V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
-			V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) |
-			V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0));
-		req++;
-	}
-	if (len1) {                  /* program buffer 1 offset and length */
-		mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET,
-			V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
-			V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32,
-			V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) |
-			V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32);
-		req++;
-	}
-
-	mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask,
-			     ddp_flags);
-
-	if (modulate) {
-		mk_rx_data_ack_ulp(toep,
-		    (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid,
-		    toep->tp_copied_seq - toep->tp_rcv_wup);
-		toep->tp_rcv_wup = toep->tp_copied_seq;
-	}
-
-#ifdef T3_TRACE
-	T3_TRACE5(TIDTB(sk),
-		  "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x "
-		  "modulate %d",
-		  len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff,
-		  modulate);
-#endif
-
-	cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-}
-
-void
-t3_init_wr_tab(unsigned int wr_len)
-{
-	int i;
-
-	if (mbuf_wrs[1])     /* already initialized */
-		return;
-
-	for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) {
-		int sgl_len = (3 * i) / 2 + (i & 1);
-
-		sgl_len += 3;
-		mbuf_wrs[i] = sgl_len <= wr_len ?
-		       	1 : 1 + (sgl_len - 2) / (wr_len - 1);
-	}
-
-	wrlen = wr_len * 8;
-}
-
-int
-t3_init_cpl_io(void)
-{
-#ifdef notyet
-	tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
-	if (!tcphdr_skb) {
-		log(LOG_ERR,
-		       "Chelsio TCP offload: can't allocate sk_buff\n");
-		return -1;
-	}
-	skb_put(tcphdr_skb, sizeof(struct tcphdr));
-	tcphdr_skb->h.raw = tcphdr_skb->data;
-	memset(tcphdr_skb->data, 0, tcphdr_skb->len);
-#endif
-	
-	t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
-	t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
-	t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack);
-	t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data);
-	t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
-	t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
-	t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
-	t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
-	t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
-	t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
-	t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
-	t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
-	t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
-	t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
-	t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);
-	return (0);
-}
-
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1034 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/condvar.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/smp.h>
-#include <sys/sockstate.h>
-#include <sys/sockopt.h>
-#include <sys/socket.h>
-#include <sys/sockbuf.h>
-#include <sys/syslog.h>
-#include <sys/uio.h>
-#include <sys/file.h>
-
-#include <machine/bus.h>
-#include <machine/cpu.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-#include <ulp/tom/cxgb_tcp_offload.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_fsm.h>
-#include <netinet/tcp_offload.h>
-#include <net/route.h>
-
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_t3_cpl.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <cxgb_offload.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_extern.h>
-#include <vm/pmap.h>
-
-#include <sys/mvec.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
-#include <ulp/tom/cxgb_tcp.h>
-
-
-static int	(*pru_sosend)(struct socket *so, struct sockaddr *addr,
-    struct uio *uio, struct mbuf *top, struct mbuf *control,
-    int flags, struct thread *td);
-
-static int	(*pru_soreceive)(struct socket *so, struct sockaddr **paddr,
-    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
-    int *flagsp);
-
-#define TMP_IOV_MAX 16
-#ifndef PG_FRAME
-#define PG_FRAME	~PAGE_MASK
-#endif
-#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
-
-void
-t3_init_socket_ops(void)
-{
-	struct protosw *prp;
-
-	prp = pffindtype(AF_INET, SOCK_STREAM);
-	pru_sosend = prp->pr_usrreqs->pru_sosend;
-	pru_soreceive = prp->pr_usrreqs->pru_soreceive;
-}
-
-struct cxgb_dma_info {
-	size_t			cdi_mapped;
-	int			cdi_nsegs;
-	bus_dma_segment_t	*cdi_segs;
-	
-};
-
-static void
-cxgb_dma_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
-    bus_size_t mapsize, int error)
-{
-	struct cxgb_dma_info *cdi = arg;
-	
-	cdi->cdi_mapped = mapsize;
-	cdi->cdi_nsegs = nsegs;
-	cdi->cdi_segs = segs;
-}
-
-static void
-iov_adj(struct iovec **iov, int *iovcnt, size_t count)
-{
-	struct iovec *iovtmp;
-	int iovcnttmp;
-	caddr_t ptmp;
-	
-	if (count > 0) {
-		iovtmp = *iov;
-		iovcnttmp = *iovcnt;
-		while (count > 0) {
-			if (count < iovtmp->iov_len) {
-				ptmp = iovtmp->iov_base;
-				ptmp += count; 
-				iovtmp->iov_base = ptmp;
-				iovtmp->iov_len -= count;
-				break;
-			} else 
-				count -= iovtmp->iov_len;
-			iovtmp++;
-			iovcnttmp--;
-		} 
-		*iov = iovtmp;
-		*iovcnt = iovcnttmp;
-	} else if (count < 0) {
-		iovtmp = &(*iov)[*iovcnt - 1];
-		iovcnttmp = *iovcnt;
-		while (count < 0) {
-			if (-count < iovtmp->iov_len) {
-				iovtmp->iov_len += count;
-				break;
-			} else
-				count += iovtmp->iov_len;
-			iovtmp--;
-			iovcnttmp--;
-		}
-		*iovcnt = iovcnttmp;
-	}
-}
-
-static void
-cxgb_zero_copy_free(void *cl, void *arg)
-{
-	struct mbuf_vec *mv;
-	struct mbuf *m = (struct mbuf *)cl;
-
-	mv = mtomv(m);
-	/*
-	 * Physical addresses, don't try to free should be unheld separately from sbdrop
-	 *
-	 */
-	mv->mv_count = 0;
-	m_free_iovec(m, m->m_type);
-}
-
-
-static int
-cxgb_hold_iovec_pages(struct uio *uio, vm_page_t *m, int *held, vm_prot_t prot)
-{
-	struct iovec *iov = uio->uio_iov;
-	int iovcnt = uio->uio_iovcnt;
-	int err, i, count, totcount, maxcount, totbytes, npages, curbytes;
-	uint64_t start, end;
-	vm_page_t *mp;
-	vm_map_t map;
-
-	map = &uio->uio_td->td_proc->p_vmspace->vm_map;
-	totbytes = totcount = 0;
-	maxcount = *held;
-
-	mp = m;
-	for (totcount = i = 0; (i < iovcnt) && (totcount < maxcount);  i++, iov++) {
-		count = maxcount - totcount;
-		    
-		start = (uintptr_t)iov->iov_base;
-		end = (uintptr_t)((caddr_t)iov->iov_base + iov->iov_len);
-		start &= PG_FRAME;
-		end += PAGE_MASK;
-		end &= PG_FRAME;
-		npages = (end - start) >> PAGE_SHIFT;
-		
-		count = min(count, npages);
-
-		/* The following return value is not used. XXX */
-		err = vm_fault_quick_hold_pages(map,
-		    (vm_offset_t)iov->iov_base, iov->iov_len, prot, mp, count);
-		mp += count;
-		totcount += count;
-		curbytes = iov->iov_len;
-		if (count != npages)
-			curbytes = count*PAGE_SIZE - (((uintptr_t)iov->iov_base)&PAGE_MASK);
-		totbytes += curbytes;
-	}
-	uio->uio_resid -= totbytes;
-
-	return (0);
-}
-
-/*
- * Returns whether a connection should enable DDP.  This happens when all of
- * the following conditions are met:
- * - the connection's ULP mode is DDP
- * - DDP is not already enabled
- * - the last receive was above the DDP threshold
- * - receive buffers are in user space
- * - receive side isn't shutdown (handled by caller)
- * - the connection's receive window is big enough so that sizable buffers
- *   can be posted without closing the window in the middle of DDP (checked
- *   when the connection is offloaded)
- */
-static int
-so_should_ddp(const struct toepcb *toep, int last_recv_len)
-{
-
-	DPRINTF("ulp_mode=%d last_recv_len=%d ddp_thresh=%d rcv_wnd=%ld ddp_copy_limit=%d\n",
-	    toep->tp_ulp_mode, last_recv_len,  TOM_TUNABLE(toep->tp_toedev, ddp_thres),
-	    toep->tp_tp->rcv_wnd, (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN));
-
-	return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.kbuf[0] == NULL) &&
-	       last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
-	       toep->tp_tp->rcv_wnd > 
-	           (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN);
-}
-
-static inline int
-is_ddp(const struct mbuf *m)
-{
-	return ((m->m_flags & M_DDP) != 0);
-}
-
-static inline int
-is_ddp_psh(const struct mbuf *m)
-{
-        return ((is_ddp(m) && (m->m_pkthdr.csum_flags & DDP_BF_PSH)) != 0);
-}
-
-static int
-m_uiomove(const struct mbuf *m, int offset, int len, struct uio *uio)
-{
-	int curlen, startlen, resid_init, err = 0;
-	caddr_t buf;
-
-	DPRINTF("m_uiomove(m=%p, offset=%d, len=%d, ...)\n",
-	    m, offset, len);
-
-	startlen = len;
-	resid_init = uio->uio_resid;
-	while (m && len) {
-		buf = mtod(m, caddr_t);
-		curlen = m->m_len;
-		if (offset && (offset < curlen)) {
-			curlen -= offset;
-			buf += offset;
-			offset = 0;
-		} else if (offset) {
-			offset -= curlen;
-			m = m->m_next;
-			continue;
-		}
-		err = uiomove(buf, min(len, curlen), uio);
-		if (err) {
-			printf("uiomove returned %d\n", err);
-			return (err);
-		}
-		
-		len -= min(len, curlen);
-		m = m->m_next;
-	}
-	DPRINTF("copied %d bytes - resid_init=%d uio_resid=%d\n",
-	    startlen - len, resid_init, uio->uio_resid);
-	return (err);
-}
-
-/*
- * Copy data from an sk_buff to an iovec.  Deals with RX_DATA, which carry the
- * data in the sk_buff body, and with RX_DATA_DDP, which place the data in a
- * DDP buffer.
- */
-static inline int
-copy_data(const struct mbuf *m, int offset, int len, struct uio *uio)
-{
-	struct iovec *to = uio->uio_iov;
-	int err;
-	
-	if (__predict_true(!is_ddp(m)))                              /* RX_DATA */
-		return m_uiomove(m, offset, len, uio);
-	if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
-		to->iov_len -= len;
-		to->iov_base = ((caddr_t)to->iov_base) + len;
-		uio->uio_iov = to;
-		uio->uio_resid -= len;
-		return (0);
-	}
-	err = t3_ddp_copy(m, offset, uio, len);             /* kernel DDP */
-	return (err);
-}
-
-static void
-cxgb_wait_dma_completion(struct toepcb *toep)
-{
-	struct rwlock *lock;
-	
-	lock = &toep->tp_tp->t_inpcb->inp_lock;
-	inp_wlock(toep->tp_tp->t_inpcb);
-	cv_wait_unlock(&toep->tp_cv, lock);
-}
-
-static int
-cxgb_vm_page_to_miov(struct toepcb *toep, struct uio *uio, struct mbuf **m)
-{
-	int i, seg_count, err, type;
-	struct mbuf *m0;
-	struct cxgb_dma_info cdi;
-	struct mbuf_vec *mv;
-	struct mbuf_iovec *mi;
-	bus_dma_segment_t *segs;
-	
-	err = bus_dmamap_load_uio(toep->tp_tx_dmat, toep->tp_dmamap, uio,
-	    cxgb_dma_callback, &cdi, 0);
-
-	if (err)
-		return (err);
-	seg_count = cdi.cdi_nsegs;	
-	if ((m0 = mcl_alloc(seg_count, &type)) == NULL) {
-		bus_dmamap_unload(toep->tp_tx_dmat, toep->tp_dmamap);
-		return (ENOMEM);
-	}
-	segs = cdi.cdi_segs;
-	m0->m_type = type;
-	m0->m_flags = (M_EXT|M_NOFREE);
-	m0->m_ext.ext_type = EXT_EXTREF;
-	m0->m_ext.ext_free = cxgb_zero_copy_free;
-#if __FreeBSD_version >= 800016
-	m0->m_ext.ext_arg1 = NULL;	/* XXX: probably wrong /phk */
-	m0->m_ext.ext_arg2 = NULL;
-#else
-	m0->m_ext.ext_args = NULL;
-#endif
-    
-	mv = mtomv(m0);
-	mv->mv_count = seg_count;
-	mv->mv_first = 0;
-	for (i = 0, mi = mv->mv_vec; i < seg_count; mi++, segs++, i++)
-		mi_collapse_sge(mi, segs);
-
-	*m = m0;
-
-	/*
-	 * This appears to be a no-op at the moment
-	 * as busdma is all or nothing need to make
-	 * sure the tag values are large enough
-	 *
-	 */
-	if (cdi.cdi_mapped < uio->uio_resid) {
-		uio->uio_resid -= cdi.cdi_mapped;
-	} else
-		uio->uio_resid = 0;
-
-	return (0);
-}
-
-static int
-t3_sosend(struct socket *so, struct uio *uio)
-{
-	int rv, count, hold_resid, sent, iovcnt;
-	struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov;
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
-	struct mbuf *m;
-	struct uio uiotmp;
-	struct sockbuf *snd;
-	
-	/*
-	 * Events requiring iteration:
-	 *  - number of pages exceeds max hold pages for process or system
-	 *  - number of pages exceeds maximum sg entries for a single WR
-	 *
-	 * We're limited to holding 128 pages at once - and we're limited to
-	 * 34 SG entries per work request, but each SG entry can be any number 
-	 * of contiguous pages
-	 *
-	 */
-
-	uiotmp = *uio;
-	iovcnt = uio->uio_iovcnt;
-	iov = uio->uio_iov;
-	sent = 0;
-	snd = so_sockbuf_snd(so);
-sendmore:
-	/*
-	 * Make sure we don't exceed the socket buffer
-	 */
-	count = min(toep->tp_page_count, (sockbuf_sbspace(snd) >> PAGE_SHIFT) + 2*PAGE_SIZE);
-	rv = cxgb_hold_iovec_pages(&uiotmp, toep->tp_pages, &count, VM_PROT_READ);
-	hold_resid = uiotmp.uio_resid;
-	if (rv)
-		return (rv);
-
-	/*
-	 * Bump past sent and shave off the unheld amount
-	 */
-	if (hold_resid  > 0) {
-		iovtmpp = iovtmp;
-		memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
-		if (sent)
-			iov_adj(&iovtmpp, &iovcnt, sent);
-		iov_adj(&iovtmpp, &iovcnt, -hold_resid);
-		uiotmp.uio_iov = iovtmpp;
-		uiotmp.uio_iovcnt = iovcnt;
-
-	}
-	uiotmp.uio_resid = uio->uio_resid - hold_resid;
-	
-	/*
-	 * Push off all held pages
-	 *
-	 */
-	while (uiotmp.uio_resid > 0) {
-		rv = cxgb_vm_page_to_miov(toep, &uiotmp, &m);
-		if (rv) {
-			vm_page_unhold_pages(toep->tp_pages, count);
-			return (rv);
-		}
-		uio->uio_resid -= m->m_pkthdr.len;
-		sent += m->m_pkthdr.len;
-		sbappend(snd, m);
-		t3_push_frames(so, TRUE);
-		iov_adj(&uiotmp.uio_iov, &iovcnt, uiotmp.uio_resid);
-	}
-
-	/*
-	 * Wait for pending I/O to be DMA'd to the card 
-	 * 
-	 */
-	cxgb_wait_dma_completion(toep);
-	vm_page_unhold_pages(toep->tp_pages, count);
-	/*
-	 * If there is more data to send adjust local copy of iov
-	 * to point to teh start
-	 */
-	if (hold_resid) {
-		iovtmpp = iovtmp;
-		memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
-		iov_adj(&iovtmpp, &iovcnt, sent);
-		uiotmp = *uio;
-		uiotmp.uio_iov = iovtmpp;
-		uiotmp.uio_iovcnt = iovcnt;
-		goto sendmore;
-	}
-
-	return (0);
-}
-
-static int
-cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
-    struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
-{
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct toedev *tdev; 
-	int zcopy_thres, zcopy_enabled, rv;
-
-	/*
-	 * In order to use DMA direct from userspace the following
-	 * conditions must be met:
-	 *  - the connection is currently offloaded
-	 *  - ddp is enabled
-	 *  - the number of bytes to be transferred exceeds the threshold
-	 *  - the number of bytes currently in flight won't exceed the in-flight
-	 *    threshold XXX TODO
-	 *  - vm_fault_quick_hold_pages succeeds
-	 *  - blocking socket XXX for now
-	 *
-	 */
-	if (tp && tp->t_flags & TF_TOE) {
-		struct toepcb *toep = tp->t_toe;
-		
-		tdev = toep->tp_toedev;
-		zcopy_thres = TOM_TUNABLE(tdev, zcopy_sosend_partial_thres);
-		zcopy_enabled = TOM_TUNABLE(tdev, zcopy_sosend_enabled);
-
-		if (uio && (uio->uio_resid > zcopy_thres) &&
-		    (uio->uio_iovcnt < TMP_IOV_MAX) &&  ((so_state_get(so) & SS_NBIO) == 0)
-		    && zcopy_enabled) {
-			rv = t3_sosend(so, uio);
-			if (rv != EAGAIN)
-				return (rv);
-		}
-	}
-	return pru_sosend(so, addr, uio, top, control, flags, td);
-}
-
-/*
- * Following replacement or removal of the first mbuf on the first mbuf chain
- * of a socket buffer, push necessary state changes back into the socket
- * buffer so that other consumers see the values consistently.  'nextrecord'
- * is the callers locally stored value of the original value of
- * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
- * NOTE: 'nextrecord' may be NULL.
- */
-static __inline void
-sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
-{
-	sockbuf_lock_assert(sb);
-	/*
-	 * First, update for the new value of nextrecord.  If necessary, make
-	 * it the first record.
-	 */
-	if (sb->sb_mb != NULL)
-		sb->sb_mb->m_nextpkt = nextrecord;
-	else
-		sb->sb_mb = nextrecord;
-
-        /*
-         * Now update any dependent socket buffer fields to reflect the new
-         * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
-	 * addition of a second clause that takes care of the case where
-	 * sb_mb has been updated, but remains the last record.
-         */
-        if (sb->sb_mb == NULL) {
-                sb->sb_mbtail = NULL;
-                sb->sb_lastrecord = NULL;
-        } else if (sb->sb_mb->m_nextpkt == NULL)
-                sb->sb_lastrecord = sb->sb_mb;
-}
-
-#define IS_NONBLOCKING(so)	(so_state_get(so) & SS_NBIO)
-
-static int
-t3_soreceive(struct socket *so, int *flagsp, struct uio *uio)
-{
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
-	struct mbuf *m;
-	uint32_t offset;
-	int err, flags, avail, len, copied, copied_unacked;
-	int target;		/* Read at least this many bytes */
-	int user_ddp_ok;
-	struct ddp_state *p;
-	struct inpcb *inp = so_sotoinpcb(so);
-	int socket_state, socket_error;
-	struct sockbuf *rcv;
-	
-	avail = offset = copied = copied_unacked = 0;
-	flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
-	rcv = so_sockbuf_rcv(so);
-	
-	err = sblock(rcv, SBLOCKWAIT(flags));
-	p = &toep->tp_ddp_state;
-
-	if (err)
-		return (err);
-
-	rcv = so_sockbuf_rcv(so);
-	sockbuf_lock(rcv);
-	if ((tp->t_flags & TF_TOE) == 0) {
-		sockbuf_unlock(rcv);
-		err = EAGAIN;
-		goto done_unlocked;
-	}
-	
-	p->user_ddp_pending = 0;
-restart:
-	if ((tp->t_flags & TF_TOE) == 0) {
-		sockbuf_unlock(rcv);
-		err = EAGAIN;
-		goto done_unlocked;
-	}
-
-	len = uio->uio_resid;
-	m = rcv->sb_mb;
-	target = (flags & MSG_WAITALL) ? len : rcv->sb_lowat;
-	user_ddp_ok = p->ubuf_ddp_ready;
-	p->cancel_ubuf = 0;
-	
-	if (len == 0)
-		goto done;
-	if (m) 
-		goto got_mbuf;
-
-	/* empty receive queue */
-	if (copied >= target && (rcv->sb_mb == NULL) &&
-	    !p->user_ddp_pending)
-		goto done;
-
-	socket_state = so_state_get(so);
-	socket_error = so_error_get(so);
-	rcv = so_sockbuf_rcv(so);
-	
-	if (copied) {
-		if (socket_error || tp->t_state == TCPS_CLOSED || 
-		    (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)))
-			goto done;
-	} else {
-		if (socket_state & SS_NOFDREF)
-			goto done;
-		if (socket_error) {
-			err = socket_error;
-			socket_error = 0;
-			goto done;
-		}
-		if (rcv->sb_state & SBS_CANTRCVMORE) 
-			goto done;
-		if (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))
-			goto done;
-		if (tp->t_state == TCPS_CLOSED) {
-			err = ENOTCONN; 
-			goto done;
-		}
-	}
-	if (rcv->sb_mb && !p->user_ddp_pending) {
-		sockbuf_unlock(rcv);
-		inp_wlock(inp);
-		t3_cleanup_rbuf(tp, copied_unacked);
-		inp_wunlock(inp);
-		sockbuf_lock(rcv);
-		copied_unacked = 0;
-		goto restart;
-	}
-	if (p->kbuf[0] && user_ddp_ok && !p->user_ddp_pending && 
-	    uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
-	    p->ubuf_ddp_ready) {
-		p->user_ddp_pending =
-		    !t3_overlay_ubuf(toep, rcv, uio,
-			IS_NONBLOCKING(so), flags, 1, 1);
-		if (p->user_ddp_pending) {
-			p->kbuf_posted++;
-			user_ddp_ok = 0;
-		}
-	}
-	if (p->kbuf[0] && (p->kbuf_posted == 0)) {
-		t3_post_kbuf(toep, 1, IS_NONBLOCKING(so));
-		p->kbuf_posted++;
-	}
-	if (p->user_ddp_pending) {
-		/* One shot at DDP if we already have enough data */
-		if (copied >= target)
-			user_ddp_ok = 0;
-
-		if (rcv->sb_state & SBS_CANTRCVMORE) 
-			goto done;
-		CTR0(KTR_TOM, "ddp pending -- waiting");
-		if ((err = sbwait(rcv)) != 0)
-			goto done;
-//for timers to work			await_ddp_completion(sk, flags, &timeo);
-	} else if (copied >= target)
-		goto done;
-	else {
-		if (copied_unacked) {
-			int i = 0;
-
-			sockbuf_unlock(rcv);
-			inp_wlock(inp);
-			t3_cleanup_rbuf(tp, copied_unacked);
-			inp_wunlock(inp);
-			copied_unacked = 0;
-			if (mp_ncpus > 1)
-				while (i++ < 200 && rcv->sb_mb == NULL)
-					cpu_spinwait();
-			sockbuf_lock(rcv);
-		}
-		if (rcv->sb_mb)
-			goto restart;
-
-		if (rcv->sb_state & SBS_CANTRCVMORE)
-			goto done;
-
-		CTR0(KTR_TOM, "no buffers -- waiting");
-
-		if ((err = sbwait(rcv)) != 0) 
-			goto done;
-	}
-     	goto restart;
-got_mbuf:
-	/*
-	 * Adjust the mbuf seqno if it has already been partially processed by
-	 * soreceive_generic
-	 */
-	if (m->m_pkthdr.len != m->m_len) {
-		m->m_seq += m->m_pkthdr.len - m->m_len;
-		m->m_pkthdr.len = m->m_len;
-	}
-	    
-	CTR6(KTR_TOM, "t3_soreceive: ddp_flags=0x%x m_len=%u resid=%u "
-	    "m_seq=0x%08x c_seq=0x%08x c_unack=%u",
-	    (is_ddp(m) ? m->m_ddp_flags : 0), m->m_pkthdr.len, len,
-	    m->m_seq, toep->tp_copied_seq, copied_unacked);
-	KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT),
-	    ("unexpected type M_EXT=%d ext_type=%d m_len=%d m_pktlen=%d\n", !!(m->m_flags & M_EXT),
-		m->m_ext.ext_type, m->m_len, m->m_pkthdr.len));
-	KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p"
-		" m_flags=0x%x m->m_len=%d", m->m_next, m->m_nextpkt, m->m_flags, m->m_len));
-	if (m->m_pkthdr.len == 0) {
-		if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0)
-			panic("empty mbuf and NOCOPY not set\n");
-		CTR0(KTR_TOM, "ddp done notification");
-		p->user_ddp_pending = 0;
-		sbdroprecord_locked(rcv);
-		goto done;
-	}
-
-	KASSERT((int32_t)(toep->tp_copied_seq + copied_unacked - m->m_seq) >= 0,
-	    ("offset will go negative: offset=%d copied_seq=0x%08x copied_unacked=%d m_seq=0x%08x",
-		offset, toep->tp_copied_seq, copied_unacked, m->m_seq));
-	offset = toep->tp_copied_seq + copied_unacked - m->m_seq;
-	
-	if (offset >= m->m_pkthdr.len)
-		panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x "
-		    "seq 0x%x pktlen %d ddp flags 0x%x", offset,
-		    toep->tp_copied_seq + copied_unacked, m->m_seq,
-		    m->m_pkthdr.len, m->m_ddp_flags);
-
-	avail = m->m_pkthdr.len - offset;
-	if (len < avail) {
-		if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY)) 
-			panic("bad state in t3_soreceive len=%d avail=%d offset=%d\n", len, avail, offset);
-		avail = len;
-		rcv->sb_flags |= SB_IN_TOE;
-	} else if (p->kbuf_posted == 0 && p->user_ddp_pending == 0)
-		rcv->sb_flags &= ~SB_IN_TOE;
-		
-#ifdef URGENT_DATA_SUPPORTED
-	/*
-	 * Check if the data we are preparing to copy contains urgent
-	 * data.  Either stop short of urgent data or skip it if it's
-	 * first and we are not delivering urgent data inline.
-	 */
-	if (__predict_false(toep->tp_urg_data)) {
-		uint32_t urg_offset = tp->rcv_up - tp->copied_seq + copied_unacked;
-		
-		if (urg_offset < avail) {
-			if (urg_offset) {
-				/* stop short of the urgent data */
-				avail = urg_offset;
-			} else if ((so_options_get(so) & SO_OOBINLINE) == 0) {
-				/* First byte is urgent, skip */
-				toep->tp_copied_seq++;
-				offset++;
-				avail--;
-				if (!avail)
-					goto skip_copy;
-			}	
-		}	
-	}	
-#endif
-	if (is_ddp_psh(m) || offset || (rcv->sb_mb && !is_ddp(m))) {
-		user_ddp_ok = 0;
-#ifdef T3_TRACE	
-		T3_TRACE0(TIDTB(so), "t3_sosend: PSH");
-#endif	
-	}
-	
-	if (user_ddp_ok && !p->user_ddp_pending &&
-	    uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
-	    p->ubuf_ddp_ready) {
-		p->user_ddp_pending = 
-		    !t3_overlay_ubuf(toep, rcv, uio,
-			IS_NONBLOCKING(so), flags, 1, 1);
-		if (p->user_ddp_pending) {
-			p->kbuf_posted++;
-			user_ddp_ok = 0;
-		}
-		DPRINTF("user_ddp_pending=%d\n", p->user_ddp_pending);
-	} else
-		DPRINTF("user_ddp_ok=%d user_ddp_pending=%d iov_len=%ld dgl_length=%d ubuf_ddp_ready=%d ulp_mode=%d is_ddp(m)=%d flags=0x%x ubuf=%p kbuf_posted=%d\n",
-		    user_ddp_ok, p->user_ddp_pending, uio->uio_iov->iov_len, p->kbuf[0] ? p->kbuf[0]->dgl_length : 0,
-		    p->ubuf_ddp_ready, toep->tp_ulp_mode, !!is_ddp(m), m->m_ddp_flags, p->ubuf, p->kbuf_posted);
-	
-	/*
-	 * If MSG_TRUNC is specified the data is discarded.
-	 * XXX need to check pr_atomic
-	 */
-	KASSERT(avail > 0, ("avail=%d resid=%d offset=%d", avail,  uio->uio_resid, offset));
-	if (__predict_true(!(flags & MSG_TRUNC))) {
-		int resid = uio->uio_resid;
-		
-		sockbuf_unlock(rcv);
-		if ((err = copy_data(m, offset, avail, uio))) {
-			if (err)
-				err = EFAULT;
-			goto done_unlocked;
-		}
-			    
-		sockbuf_lock(rcv);
-		if (avail != (resid - uio->uio_resid))
-			printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n",
-			    avail, offset, m->m_pkthdr.len, resid, uio->uio_resid, copied, copied_unacked, is_ddp(m));
-
-		if ((tp->t_flags & TF_TOE) == 0) {
-			sockbuf_unlock(rcv);
-			err = EAGAIN;
-			goto done_unlocked;
-		}
-	}
-	
-	copied += avail;
-	copied_unacked += avail;
-	len -= avail;
-	
-#ifdef URGENT_DATA_SUPPORTED
-skip_copy:
-	if (tp->urg_data && after(tp->copied_seq + copied_unacked, tp->urg_seq))
-		tp->urg_data = 0;
-#endif
-	/*
-	 * If the buffer is fully consumed free it.  If it's a DDP
-	 * buffer also handle any events it indicates.
-	 */
-	if (avail + offset >= m->m_pkthdr.len) {
-		unsigned int fl = m->m_ddp_flags;
-		int exitnow, got_psh = 0, nomoredata = 0;
-		int count;
-		struct mbuf *nextrecord;
-
-		if (p->kbuf[0] != NULL && is_ddp(m) && (fl & 1)) {
-			if (is_ddp_psh(m) && p->user_ddp_pending)
-				got_psh = 1;
-			
-			if (fl & DDP_BF_NOCOPY)
-				p->user_ddp_pending = 0;
-			else if ((fl & DDP_BF_NODATA) && IS_NONBLOCKING(so)) {
-				p->kbuf_posted--;
-				nomoredata = 1;
-			} else {
-				p->kbuf_posted--;
-				p->ubuf_ddp_ready = 1;
-			}
-		}
-
-		nextrecord = m->m_nextpkt;
-		count = m->m_pkthdr.len;
-		while (count > 0) {
-			count -= m->m_len;
-			KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
-			CTR2(KTR_TOM, "freeing mbuf m_len = %d pktlen = %d", m->m_len, m->m_pkthdr.len);
-			sbfree(rcv, m);
-			rcv->sb_mb = m_free(m);
-			m = rcv->sb_mb;
-		}
-		sockbuf_pushsync(rcv, nextrecord);
-#if 0
-		sbdrop_locked(rcv, m->m_pkthdr.len);
-#endif		
-		exitnow = got_psh || nomoredata;
-		if  (copied >= target && (rcv->sb_mb == NULL) && exitnow)
-			goto done;
-		if (copied_unacked > (rcv->sb_hiwat >> 2)) {
-			sockbuf_unlock(rcv);
-			inp_wlock(inp);
-			t3_cleanup_rbuf(tp, copied_unacked);
-			inp_wunlock(inp);
-			copied_unacked = 0;
-			sockbuf_lock(rcv);
-		}
-	} 
-	if (len > 0)
-		goto restart;
-
-	done:
-	if ((tp->t_flags & TF_TOE) == 0) {
-		sockbuf_unlock(rcv);
-		err = EAGAIN;
-		goto done_unlocked;
-	}
-	/*
-	 * If we can still receive decide what to do in preparation for the
-	 * next receive.  Note that RCV_SHUTDOWN is set if the connection
-	 * transitioned to CLOSE but not if it was in that state to begin with.
-	 */
-	if (__predict_true((so_state_get(so) & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) {
-		if (p->user_ddp_pending) {
-			user_ddp_ok = 0;
-			t3_cancel_ubuf(toep, rcv);
-			if (rcv->sb_mb) {
-				if (copied < 0)
-					copied = 0;
-				if (len > 0)
-					goto restart;
-			}
-			p->user_ddp_pending = 0;
-		}
-		if ((p->kbuf[0] != NULL) && (p->kbuf_posted == 0)) {
-#ifdef T3_TRACE
-			T3_TRACE0(TIDTB(so),
-			  "chelsio_recvmsg: about to exit, repost kbuf");
-#endif
-
-			t3_post_kbuf(toep, 1, IS_NONBLOCKING(so));
-			p->kbuf_posted++;
-		} else if (so_should_ddp(toep, copied) && uio->uio_iovcnt == 1) {
-			CTR1(KTR_TOM ,"entering ddp on tid=%u", toep->tp_tid);
-			if (!t3_enter_ddp(toep, TOM_TUNABLE(toep->tp_toedev,
-				    ddp_copy_limit), 0, IS_NONBLOCKING(so))) {
-				rcv->sb_flags |= SB_IN_TOE;
-				p->kbuf_posted = 1;
-			}
-			
-		}
-	}
-#ifdef T3_TRACE
-	T3_TRACE5(TIDTB(so),
-		  "chelsio_recvmsg <-: copied %d len %d buffers_freed %d "
-		  "kbuf_posted %d user_ddp_pending %u",
-		  copied, len, buffers_freed, p ? p->kbuf_posted : -1, 
-	    p->user_ddp_pending);
-#endif
-	sockbuf_unlock(rcv);
-done_unlocked:	
-	if (copied_unacked && (tp->t_flags & TF_TOE)) {
-		inp_wlock(inp);
-		t3_cleanup_rbuf(tp, copied_unacked);
-		inp_wunlock(inp);
-	}
-	sbunlock(rcv);
-
-	return (err);
-}
-
-static int
-cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
-    struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
-{
-	struct toedev *tdev;
-	int rv, zcopy_thres, zcopy_enabled, flags;
-	struct tcpcb *tp = so_sototcpcb(so);
-	struct sockbuf *rcv = so_sockbuf_rcv(so);
-	
-	flags = flagsp ? *flagsp &~ MSG_EOR : 0;
-	
-	/*
-	 * In order to use DMA direct from userspace the following
-	 * conditions must be met:
-	 *  - the connection is currently offloaded
-	 *  - ddp is enabled
-	 *  - the number of bytes to be transferred exceeds the threshold
-	 *  - the number of bytes currently in flight won't exceed the in-flight
-	 *    threshold XXX TODO
-	 *  - vm_fault_quick_hold_pages succeeds
-	 *  - blocking socket XXX for now
-	 *  - iovcnt is 1
-	 *
-	 */
-	if (tp && (tp->t_flags & TF_TOE) && uio && ((flags & (MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0)
-	    && (uio->uio_iovcnt == 1) && (mp0 == NULL) &&
-	    ((rcv->sb_flags & SB_IN_TOE) || (uio->uio_iovcnt == 1))) {
-		struct toepcb *toep = tp->t_toe;
-		
-		tdev =  toep->tp_toedev;
-		zcopy_thres = TOM_TUNABLE(tdev, ddp_thres);
-		zcopy_enabled = TOM_TUNABLE(tdev, ddp);
-		if ((rcv->sb_flags & SB_IN_TOE) ||((uio->uio_resid > zcopy_thres) &&
-			(uio->uio_iovcnt == 1) && zcopy_enabled)) {
-			CTR4(KTR_TOM, "cxgb_soreceive: sb_flags=0x%x t_flags=0x%x flags=0x%x uio_resid=%d",
-			    rcv->sb_flags, tp->t_flags, flags, uio->uio_resid);
-			rv = t3_soreceive(so, flagsp, uio);
-			if (rv != EAGAIN)
-				return (rv);
-			else
-				printf("returned EAGAIN\n");
-		} 
-	} else if (tp && (tp->t_flags & TF_TOE) && uio && mp0 == NULL) {
-		struct sockbuf *rcv = so_sockbuf_rcv(so);
-		
-		log(LOG_INFO, "skipping t3_soreceive flags=0x%x iovcnt=%d sb_state=0x%x\n",
-		    flags, uio->uio_iovcnt, rcv->sb_state);
-	}
-	
-	return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
-}
-
-struct protosw cxgb_protosw;
-struct pr_usrreqs cxgb_tcp_usrreqs;
-
-void
-t3_install_socket_ops(struct socket *so)
-{
-	static int copied = 0;
-	struct pr_usrreqs *pru;
-	struct protosw *psw;
-	
-	if (copied == 0) {
-		psw = so_protosw_get(so);	
-		pru = psw->pr_usrreqs;
-
-		bcopy(psw, &cxgb_protosw, sizeof(*psw));
-		bcopy(pru, &cxgb_tcp_usrreqs, sizeof(*pru));
-
-		cxgb_protosw.pr_ctloutput = t3_ctloutput;
-		cxgb_protosw.pr_usrreqs = &cxgb_tcp_usrreqs;
-		cxgb_tcp_usrreqs.pru_sosend = cxgb_sosend;
-		cxgb_tcp_usrreqs.pru_soreceive = cxgb_soreceive;
-	}
-	so_protosw_set(so, &cxgb_protosw);
-	
-#if 0	
-	so->so_proto->pr_usrreqs->pru_sosend = cxgb_sosend;
-	so->so_proto->pr_usrreqs->pru_soreceive = cxgb_soreceive;
-#endif
-}
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,738 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/condvar.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/sockstate.h>
-#include <sys/sockopt.h>
-#include <sys/socket.h>
-#include <sys/sockbuf.h>
-#include <sys/syslog.h>
-#include <sys/uio.h>
-
-#include <machine/bus.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-
-#include <ulp/tom/cxgb_tcp_offload.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_fsm.h>
-#include <netinet/tcp_offload.h>
-#include <net/route.h>
-
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_t3_cpl.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <cxgb_offload.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_extern.h>
-#include <vm/pmap.h>
-
-#include <sys/mvec.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
-#include <ulp/tom/cxgb_tcp.h>
-
-
-#define MAX_SCHEDULE_TIMEOUT	300
-
-/*
- * Return the # of page pods needed to accommodate a # of pages.
- */
-static inline unsigned int
-pages2ppods(unsigned int pages)
-{
-	return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS;
-}
-
-/**
- *	t3_pin_pages - pin a user memory range and prepare it for DDP
- *	@addr - the starting address
- *	@len - the length of the range
- *	@newgl - contains the pages and physical addresses of the pinned range
- *	@gl - an existing gather list, may be %NULL
- *
- *	Pins the pages in the user-space memory range [addr, addr + len) and
- *	maps them for DMA.  Returns a gather list with the pinned pages and
- *	their physical addresses.  If @gl is non NULL the pages it describes
- *	are compared against the pages for [addr, addr + len), and if the
- *	existing gather list already covers the range a new list is not
- *	allocated.  Returns 0 on success, or a negative errno.  On success if
- *	a new gather list was allocated it is returned in @newgl.
- */ 
-static int
-t3_pin_pages(bus_dma_tag_t tag, bus_dmamap_t dmamap, vm_offset_t addr,
-    size_t len, struct ddp_gather_list **newgl,
-    const struct ddp_gather_list *gl)
-{
-	int i = 0, err;
-	size_t pg_off;
-	unsigned int npages;
-	struct ddp_gather_list *p;
-	vm_map_t map;
-	
-	pg_off = addr & PAGE_MASK;
-	npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	p = malloc(sizeof(struct ddp_gather_list) + npages * sizeof(vm_page_t *),
-	    M_DEVBUF, M_NOWAIT|M_ZERO);
-	if (p == NULL)
-		return (ENOMEM);
-
-	map = &curthread->td_proc->p_vmspace->vm_map;
-	if (vm_fault_quick_hold_pages(map, addr, len, VM_PROT_READ |
-	    VM_PROT_WRITE, p->dgl_pages, npages) < 0) {
-		err = EFAULT;
-		goto free_gl;
-	}
-
-	if (gl && gl->dgl_offset == pg_off && gl->dgl_nelem >= npages &&
-	    gl->dgl_length >= len) {
-		for (i = 0; i < npages; i++)
-			if (p->dgl_pages[i] != gl->dgl_pages[i])
-				goto different_gl;
-		err = 0;
-		goto unpin;
-	}
-
-different_gl:
-	p->dgl_length = len;
-	p->dgl_offset = pg_off;
-	p->dgl_nelem = npages;
-#ifdef NEED_BUSDMA
-	p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off,
-				       PAGE_SIZE - pg_off,
-				       PCI_DMA_FROMDEVICE) - pg_off;
-	for (i = 1; i < npages; ++i)
-		p->phys_addr[i] = pci_map_page(pdev, p->pages[i], 0, PAGE_SIZE,
-					       PCI_DMA_FROMDEVICE);
-#endif	
-	*newgl = p;
-	return (0);
-unpin:
-	vm_page_unhold_pages(p->dgl_pages, npages);
-
-free_gl:
-	
-	free(p, M_DEVBUF);
-	*newgl = NULL;
-	return (err);
-}
-
-static void
-unmap_ddp_gl(const struct ddp_gather_list *gl)
-{
-#ifdef NEED_BUSDMA	
-	int i;
-
-	if (!gl->nelem)
-		return;
-
-	pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset,
-		       PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE);
-	for (i = 1; i < gl->nelem; ++i)
-		pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE,
-			       PCI_DMA_FROMDEVICE);
-
-#endif
-}
-
-static void
-ddp_gl_free_pages(struct ddp_gather_list *gl, int dirty)
-{
-	/*
-	 * XXX mark pages as dirty before unholding 
-	 */
-	vm_page_unhold_pages(gl->dgl_pages, gl->dgl_nelem);
-}
-
-void
-t3_free_ddp_gl(struct ddp_gather_list *gl)
-{
-	unmap_ddp_gl(gl);
-	ddp_gl_free_pages(gl, 0);
-	free(gl, M_DEVBUF);
-}
-
-/* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */
-#define MAX_PPODS 64U
-
-/*
- * Allocate page pods for DDP buffer 1 (the user buffer) and set up the tag in
- * the TCB.  We allocate page pods in multiples of PPOD_CLUSTER_SIZE.  First we
- * try to allocate enough page pods to accommodate the whole buffer, subject to
- * the MAX_PPODS limit.  If that fails we try to allocate PPOD_CLUSTER_SIZE page
- * pods before failing entirely.
- */
-static int
-alloc_buf1_ppods(struct toepcb *toep, struct ddp_state *p,
-			    unsigned long addr, unsigned int len)
-{
-	int err, tag, npages, nppods;
-	struct tom_data *d = TOM_DATA(toep->tp_toedev);
-
-#if 0
-	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-#endif	
-	npages = ((addr & PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	nppods = min(pages2ppods(npages), MAX_PPODS);
-	nppods = roundup2(nppods, PPOD_CLUSTER_SIZE);
-	err = t3_alloc_ppods(d, nppods, &tag);
-	if (err && nppods > PPOD_CLUSTER_SIZE) {
-		nppods = PPOD_CLUSTER_SIZE;
-		err = t3_alloc_ppods(d, nppods, &tag);
-	}
-	if (err)
-		return (ENOMEM);
-
-	p->ubuf_nppods = nppods;
-	p->ubuf_tag = tag;
-#if NUM_DDP_KBUF == 1
-	t3_set_ddp_tag(toep, 1, tag << 6);
-#endif
-	return (0);
-}
-
-/*
- * Starting offset for the user DDP buffer.  A non-0 value ensures a DDP flush
- * won't block indefinitely if there's nothing to place (which should be rare).
- */
-#define UBUF_OFFSET 1
-
-static __inline unsigned long
-select_ddp_flags(const struct toepcb *toep, int buf_idx,
-                 int nonblock, int rcv_flags)
-{
-	if (buf_idx == 1) {
-		if (__predict_false(rcv_flags & MSG_WAITALL))
-			return V_TF_DDP_PSH_NO_INVALIDATE0(1) |
-			       V_TF_DDP_PSH_NO_INVALIDATE1(1) |
-			       V_TF_DDP_PUSH_DISABLE_1(1);
-		if (nonblock)
-			return V_TF_DDP_BUF1_FLUSH(1);
-
-		return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(toep->tp_toedev,
-							ddp_push_wait));
-	}
-
-	if (__predict_false(rcv_flags & MSG_WAITALL))
-		return V_TF_DDP_PSH_NO_INVALIDATE0(1) |
-		       V_TF_DDP_PSH_NO_INVALIDATE1(1) |
-		       V_TF_DDP_PUSH_DISABLE_0(1);
-	if (nonblock)
-		return V_TF_DDP_BUF0_FLUSH(1);
-
-	return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(toep->tp_toedev, ddp_push_wait));
-}
-
-/*
- * Reposts the kernel DDP buffer after it has been previously become full and
- * invalidated.  We just need to reset the offset and adjust the DDP flags.
- * Conveniently, we can set the flags and the offset with a single message.
- * Note that this function does not set the buffer length.  Again conveniently
- * our kernel buffer is of fixed size.  If the length needs to be changed it
- * needs to be done separately.
- */
-static void
-t3_repost_kbuf(struct toepcb *toep, unsigned int bufidx, int modulate, 
-    int activate, int nonblock)
-{
-	struct ddp_state *p = &toep->tp_ddp_state;
-	unsigned long flags;
-
-#if 0	
-	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-#endif	
-	p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
-	p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
-	p->buf_state[bufidx].gl = p->kbuf[bufidx];
-	p->cur_buf = bufidx;
-	p->kbuf_idx = bufidx;
-
-	flags = select_ddp_flags(toep, bufidx, nonblock, 0);
-	if (!bufidx)
-		t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
-			 V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |
-			 V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) |
-		         V_TF_DDP_BUF0_VALID(1),
-		         V_TF_DDP_BUF0_FLUSH(1) |
-			 V_TF_DDP_PSH_NO_INVALIDATE0(1) |
-		         V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) |
-			 V_TF_DDP_BUF0_VALID(1) |
-			 V_TF_DDP_ACTIVE_BUF(activate), modulate);
-	else
-		t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
-			 V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |	
-		         V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) | 
-			 V_TF_DDP_BUF1_VALID(1) | 
-			 V_TF_DDP_ACTIVE_BUF(activate),
-		         V_TF_DDP_BUF1_FLUSH(1) | 
-			 V_TF_DDP_PSH_NO_INVALIDATE0(1) |
-		         V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) |
-			 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 
-			 modulate);
-	
-}
-
-/**
- * setup_uio_ppods - setup HW page pods for a user iovec
- * @sk: the associated socket
- * @uio: the uio
- * @oft: additional bytes to map before the start of the buffer
- *
- * Pins a user iovec and sets up HW page pods for DDP into it.  We allocate
- * page pods for user buffers on the first call per socket.  Afterwards we
- * limit the buffer length to whatever the existing page pods can accommodate.
- * Returns a negative error code or the length of the mapped buffer.
- *
- * The current implementation handles iovecs with only one entry.
- */
-static int
-setup_uio_ppods(struct toepcb *toep, const struct uio *uio, int oft, int *length)
-{
-	int err;
-	unsigned int len;
-	struct ddp_gather_list *gl = NULL;
-	struct ddp_state *p = &toep->tp_ddp_state;
-	struct iovec *iov = uio->uio_iov;
-	vm_offset_t addr = (vm_offset_t)iov->iov_base - oft;
-
-#ifdef notyet	
-	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-#endif
-	if (__predict_false(p->ubuf_nppods == 0)) {
-		err = alloc_buf1_ppods(toep, p, addr, iov->iov_len + oft);
-		if (err)
-			return (err);
-	}
-
-	len = (p->ubuf_nppods - NUM_SENTINEL_PPODS) * PPOD_PAGES * PAGE_SIZE;
-	len -= addr & PAGE_MASK;
-	if (len > M_TCB_RX_DDP_BUF0_LEN)
-		len = M_TCB_RX_DDP_BUF0_LEN;
-	len = min(len, toep->tp_tp->rcv_wnd - 32768);
-	len = min(len, iov->iov_len + oft);
-
-	if (len <= p->kbuf[0]->dgl_length) {
-		printf("length too short\n");
-		return (EINVAL);
-	}
-	
-	err = t3_pin_pages(toep->tp_rx_dmat, toep->tp_dmamap, addr, len, &gl, p->ubuf);
-	if (err)
-		return (err);
-	if (gl) {
-		if (p->ubuf)
-			t3_free_ddp_gl(p->ubuf);
-		p->ubuf = gl;
-		t3_setup_ppods(toep, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
-			       gl->dgl_offset, 0);
-	}
-	*length = len;
-	return (0);
-}
-
-/*
- * 
- */
-void
-t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv)
-{
-	struct ddp_state *p = &toep->tp_ddp_state;
-	int ubuf_pending = t3_ddp_ubuf_pending(toep);
-	int err = 0, count = 0;
-	
-	if (p->ubuf == NULL)
-		return;
-	
-	sockbuf_lock_assert(rcv);
-
-	p->cancel_ubuf = 1;
-	while (ubuf_pending && !(rcv->sb_state & SBS_CANTRCVMORE)) {
-		CTR3(KTR_TOM,
-		  "t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d",
-		  p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY), 
-		  p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
-		  p->get_tcb_count);	
-		if (p->get_tcb_count == 0)
-			t3_cancel_ddpbuf(toep, p->cur_buf);
-		else
-			CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p SBS_CANTRCVMORE=%d",
-			    err, p->get_tcb_count, rcv->sb_timeo, rcv,
-			    !!(rcv->sb_state & SBS_CANTRCVMORE));
-		
-		while (p->get_tcb_count && !(rcv->sb_state & SBS_CANTRCVMORE)) {
-			if (count & 0xfffffff)
-				CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p count=%d",
-				    err, p->get_tcb_count, rcv->sb_timeo, rcv, count);
-			count++;
-			err = sbwait(rcv);
-		}
-		ubuf_pending = t3_ddp_ubuf_pending(toep);
-	}
-	p->cancel_ubuf = 0;
-	p->user_ddp_pending = 0;
-
-}
-
-#define OVERLAY_MASK (V_TF_DDP_PSH_NO_INVALIDATE0(1) | \
-	              V_TF_DDP_PSH_NO_INVALIDATE1(1) | \
-		      V_TF_DDP_BUF1_FLUSH(1) | \
-		      V_TF_DDP_BUF0_FLUSH(1) | \
-		      V_TF_DDP_PUSH_DISABLE_1(1) | \
-		      V_TF_DDP_PUSH_DISABLE_0(1) | \
-		      V_TF_DDP_INDICATE_OUT(1))
-
-/*
- * Post a user buffer as an overlay on top of the current kernel buffer.
- */
-int
-t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
-    const struct uio *uio, int nonblock, int rcv_flags,
-    int modulate, int post_kbuf)
-{
-	int err, len, ubuf_idx;
-	unsigned long flags;
-	struct ddp_state *p = &toep->tp_ddp_state;
-
-	if (p->kbuf[0] == NULL) {
-		return (EINVAL);
-	}
-	sockbuf_unlock(rcv);
-	err = setup_uio_ppods(toep, uio, 0, &len);
-	sockbuf_lock(rcv);
-	if (err)
-		return (err);
-	
-	if ((rcv->sb_state & SBS_CANTRCVMORE) ||
-	    (toep->tp_tp->t_flags & TF_TOE) == 0) 
-		return (EINVAL);
-		
-	ubuf_idx = p->kbuf_idx;
-	p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP;
-	/* Use existing offset */
-	/* Don't need to update .gl, user buffer isn't copied. */
-	p->cur_buf = ubuf_idx;
-
-	flags = select_ddp_flags(toep, ubuf_idx, nonblock, rcv_flags);
-
-	if (post_kbuf) {
-		struct ddp_buf_state *dbs = &p->buf_state[ubuf_idx ^ 1];
-		
-		dbs->cur_offset = 0;
-		dbs->flags = 0;
-		dbs->gl = p->kbuf[ubuf_idx ^ 1];
-		p->kbuf_idx ^= 1;
-		flags |= p->kbuf_idx ?
-		    V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_PUSH_DISABLE_1(0) :
-		    V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_PUSH_DISABLE_0(0);
-	}
-	
-	if (ubuf_idx == 0) {
-		t3_overlay_ddpbuf(toep, 0, p->ubuf_tag << 6, p->kbuf_tag[1] << 6,
-				  len);
-		t3_setup_ddpbufs(toep, 0, 0, p->kbuf[1]->dgl_length, 0,
-				 flags,
-				 OVERLAY_MASK | flags, 1);
-	} else {
-		t3_overlay_ddpbuf(toep, 1, p->kbuf_tag[0] << 6, p->ubuf_tag << 6,
-				  len);
-		t3_setup_ddpbufs(toep, p->kbuf[0]->dgl_length, 0, 0, 0,
-				 flags,
-				 OVERLAY_MASK | flags, 1);
-	}
-#ifdef T3_TRACE
-	T3_TRACE5(TIDTB(so),
-		  "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d "
-		  " kbuf_idx %d",
-		   p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx);
-#endif
-	CTR3(KTR_TOM,
-	    "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x",
-	    p->ubuf_tag, flags, OVERLAY_MASK);
-	CTR3(KTR_TOM,
-	    "t3_overlay_ubuf:  ubuf_idx %d kbuf_idx %d post_kbuf %d",
-	    ubuf_idx, p->kbuf_idx, post_kbuf);
-	    
-	return (0);
-}
-
-/*
- * Clean up DDP state that needs to survive until socket close time, such as the
- * DDP buffers.  The buffers are already unmapped at this point as unmapping
- * needs the PCI device and a socket may close long after the device is removed.
- */
-void
-t3_cleanup_ddp(struct toepcb *toep)
-{
-	struct ddp_state *p = &toep->tp_ddp_state;
-	int idx;
-
-	for (idx = 0; idx < NUM_DDP_KBUF; idx++)
-		if (p->kbuf[idx]) {
-			ddp_gl_free_pages(p->kbuf[idx], 0);
-			free(p->kbuf[idx], M_DEVBUF);
-		}
-	if (p->ubuf) {
-		ddp_gl_free_pages(p->ubuf, 0);
-		free(p->ubuf, M_DEVBUF);
-		p->ubuf = NULL;
-	}
-	toep->tp_ulp_mode = 0;
-}
-
-/*
- * This is a companion to t3_cleanup_ddp() and releases the HW resources
- * associated with a connection's DDP state, such as the page pods.
- * It's called when HW is done with a connection.   The rest of the state
- * remains available until both HW and the app are done with the connection.
- */
-void
-t3_release_ddp_resources(struct toepcb *toep)
-{
-	struct ddp_state *p = &toep->tp_ddp_state;
-	struct tom_data *d = TOM_DATA(toep->tp_toedev);
-	int idx;
-	
-	for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
-		t3_free_ppods(d, p->kbuf_tag[idx], 
-		    p->kbuf_nppods[idx]);
-		unmap_ddp_gl(p->kbuf[idx]);
-	}
-
-	if (p->ubuf_nppods) {
-		t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods);
-		p->ubuf_nppods = 0;
-	}
-	if (p->ubuf)
-		unmap_ddp_gl(p->ubuf);
-	
-}
-
-void
-t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock)
-{
-	struct ddp_state *p = &toep->tp_ddp_state;
-
-	t3_set_ddp_tag(toep, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
-	t3_set_ddp_buf(toep, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
-	t3_repost_kbuf(toep, p->cur_buf, modulate, 1, nonblock);
-#ifdef T3_TRACE
-	T3_TRACE1(TIDTB(so),
-		  "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
-#endif
-	CTR1(KTR_TOM,
-		  "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
-}
-
-/*
- * Prepare a socket for DDP.  Must be called when the socket is known to be
- * open.
- */
-int
-t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock)
-{
-	int i, err = ENOMEM;
-	static vm_pindex_t color;
-	unsigned int nppods, kbuf_pages, idx = 0;
-	struct ddp_state *p = &toep->tp_ddp_state;
-	struct tom_data *d = TOM_DATA(toep->tp_toedev);
-
-	
-	if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
-		return (EINVAL);
-
-#ifdef notyet	
-	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-#endif	
-	kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	nppods = pages2ppods(kbuf_pages);
-
-	p->kbuf_noinval = !!waitall;
-	p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
-	for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
-		p->kbuf[idx] = 
-		    malloc(sizeof (struct ddp_gather_list) + kbuf_pages *
-			sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO);
-		if (p->kbuf[idx] == NULL)
-			goto err;
-		err = t3_alloc_ppods(d, nppods, &p->kbuf_tag[idx]);
-		if (err) {
-			printf("t3_alloc_ppods failed err=%d\n", err);
-			goto err;
-		}
-		
-		p->kbuf_nppods[idx] = nppods;
-		p->kbuf[idx]->dgl_length = kbuf_size;
-		p->kbuf[idx]->dgl_offset = 0;
-		p->kbuf[idx]->dgl_nelem = kbuf_pages;
-
-		for (i = 0; i < kbuf_pages; ++i) {
-			p->kbuf[idx]->dgl_pages[i] = vm_page_alloc(NULL, color,
-			    VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
-			    VM_ALLOC_ZERO);
-			if (p->kbuf[idx]->dgl_pages[i] == NULL) {
-				p->kbuf[idx]->dgl_nelem = i;
-				printf("failed to allocate kbuf pages\n");
-				goto err;
-			}
-		}
-#ifdef NEED_BUSDMA
-		/*
-		 * XXX we'll need this for VT-d or any platform with an iommu :-/
-		 *
-		 */
-		for (i = 0; i < kbuf_pages; ++i)
-			p->kbuf[idx]->phys_addr[i] = 
-			    pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
-					 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-#endif
-		t3_setup_ppods(toep, p->kbuf[idx], nppods, p->kbuf_tag[idx], 
-			       p->kbuf[idx]->dgl_length, 0, 0);
-	}
-	cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
-
-	t3_set_ddp_tag(toep, 0, p->kbuf_tag[0] << 6);
-	t3_set_ddp_buf(toep, 0, 0, p->kbuf[0]->dgl_length);
-	t3_repost_kbuf(toep, 0, 0, 1, nonblock);
-
-	t3_set_rcv_coalesce_enable(toep, 
-	    TOM_TUNABLE(toep->tp_toedev, ddp_rcvcoalesce));
-	t3_set_dack_mss(toep, TOM_TUNABLE(toep->tp_toedev, delack)>>1);
-	
-#ifdef T3_TRACE
-	T3_TRACE4(TIDTB(so),
-		  "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
-		   kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
-#endif
-	CTR4(KTR_TOM,
-		  "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
-		   kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
-	cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
-	return (0);
-
-err:
-	t3_release_ddp_resources(toep);
-	t3_cleanup_ddp(toep);
-	return (err);
-}
-
-int
-t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len)
-{
-	int resid_init, err;
-	struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl;
-	
-	resid_init = uio->uio_resid;
-	
-	if (!gl->dgl_pages)
-		panic("pages not set\n");
-
-	CTR4(KTR_TOM, "t3_ddp_copy: offset=%d dgl_offset=%d cur_offset=%d len=%d",
-	    offset, gl->dgl_offset, m->m_cur_offset, len);
-	offset += gl->dgl_offset + m->m_cur_offset;
-	KASSERT(len <= gl->dgl_length,
-	    ("len=%d > dgl_length=%d in ddp_copy\n", len, gl->dgl_length));
-
-
-	err = uiomove_fromphys(gl->dgl_pages, offset, len, uio);
-	return (err);
-}
-
-
-/*
- * Allocate n page pods.  Returns -1 on failure or the page pod tag.
- */
-int
-t3_alloc_ppods(struct tom_data *td, unsigned int n, int *ptag)
-{
-	unsigned int i, j;
-
-	if (__predict_false(!td->ppod_map)) {
-		printf("ppod_map not set\n");
-		return (EINVAL);
-	}
-
-	mtx_lock(&td->ppod_map_lock);
-	for (i = 0; i < td->nppods; ) {
-		
-		for (j = 0; j < n; ++j)           /* scan ppod_map[i..i+n-1] */
-			if (td->ppod_map[i + j]) {
-				i = i + j + 1;
-				goto next;
-			}
-		memset(&td->ppod_map[i], 1, n);   /* allocate range */
-		mtx_unlock(&td->ppod_map_lock);
-		CTR2(KTR_TOM,
-		    "t3_alloc_ppods: n=%u tag=%u", n, i);
-		*ptag = i;
-		return (0);
-	next: ;
-	}
-	mtx_unlock(&td->ppod_map_lock);
-	return (0);
-}
-
-void
-t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n)
-{
-	/* No need to take ppod_lock here */
-	memset(&td->ppod_map[tag], 0, n);
-}
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_defs.h
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_defs.h	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-
-$FreeBSD$
-
-***************************************************************************/
-#ifndef CXGB_DEFS_H_
-#define CXGB_DEFS_H_
-
-#define VALIDATE_TID 0
-
-#define TOEPCB(so)  ((struct toepcb *)(sototcpcb((so))->t_toe))
-#define TOE_DEV(so) (TOEPCB((so))->tp_toedev)
-#define toeptoso(toep) ((toep)->tp_tp->t_inpcb->inp_socket)
-#define sototoep(so) (sototcpcb((so))->t_toe)
-
-#define TRACE_ENTER printf("%s:%s entered\n", __FUNCTION__, __FILE__)
-#define TRACE_EXIT printf("%s:%s:%d exited\n", __FUNCTION__, __FILE__, __LINE__)
-	
-#define	KTR_TOM	KTR_SPARE2
-#define	KTR_TCB	KTR_SPARE3
-
-struct toepcb;
-struct listen_ctx;
-
-void cxgb_log_tcb(struct adapter *sc, unsigned int tid);
-typedef void (*defer_handler_t)(struct toedev *dev, struct mbuf *m);
-
-void t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h);
-void t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
-void t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
-int t3_push_frames(struct socket *so, int req_completion);
-int t3_connect(struct toedev *tdev, struct socket *so, struct rtentry *rt,
-	struct sockaddr *nam);
-void t3_init_listen_cpl_handlers(void);
-int t3_init_cpl_io(void);
-void t3_init_wr_tab(unsigned int wr_len);
-uint32_t t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail);
-void t3_send_rx_modulate(struct toepcb *toep);
-void t3_cleanup_rbuf(struct tcpcb *tp, int copied);
-
-void t3_init_socket_ops(void);
-void t3_install_socket_ops(struct socket *so);
-
-
-void t3_disconnect_acceptq(struct socket *listen_so);
-void t3_reset_synq(struct listen_ctx *ctx);
-void t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler);
-
-struct toepcb *toepcb_alloc(void);
-void toepcb_hold(struct toepcb *);
-void toepcb_release(struct toepcb *);
-void toepcb_init(struct toepcb *);
-
-void t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off);
-void t3_set_dack_mss(struct toepcb *toep, int on);
-void t3_set_keepalive(struct toepcb *toep, int on_off);
-void t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag);
-void t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
-		    unsigned int len);
-int t3_get_tcb(struct toepcb *toep);
-
-int t3_ctloutput(struct socket *so, struct sockopt *sopt);
-
-#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_l2t.c
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_l2t.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/tom/cxgb_l2t.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,76 +1,61 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_l2t.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#if __FreeBSD_version > 700000
-#include <sys/rwlock.h>
-#endif
-
 #include <sys/socket.h>
 #include <net/if.h>
 #include <net/ethernet.h>
 #include <net/if_vlan_var.h>
-#include <net/if_dl.h>
-#include <net/route.h>
 #include <netinet/in.h>
-#include <netinet/if_ether.h>
+#include <netinet/toecore.h>
 
-#include <cxgb_include.h>
-#include <ulp/tom/cxgb_l2t.h>
+#include "cxgb_include.h"
+#include "ulp/tom/cxgb_tom.h"
+#include "ulp/tom/cxgb_l2t.h"
 
-#define VLAN_NONE 0xfff
-#define SDL(s) ((struct sockaddr_dl *)s) 
-#define RT_ENADDR(sa)  ((u_char *)LLADDR(SDL((sa))))
-#define rt_expire rt_rmx.rmx_expire 
-
-struct llinfo_arp { 
-        struct  callout la_timer; 
-        struct  rtentry *la_rt; 
-        struct  mbuf *la_hold;  /* last packet until resolved/timeout */ 
-        u_short la_preempt;     /* countdown for pre-expiry arps */ 
-        u_short la_asked;       /* # requests sent */ 
-}; 
+#define VLAN_NONE	0xfff
+#define SA(x)		((struct sockaddr *)(x))
+#define SIN(x)		((struct sockaddr_in *)(x))
+#define SINADDR(x)	(SIN(x)->sin_addr.s_addr)
 
 /*
  * Module locking notes:  There is a RW lock protecting the L2 table as a
- * whole plus a spinlock per L2T entry.  Entry lookups and allocations happen
+ * whole plus a mutex per L2T entry.  Entry lookups and allocations happen
  * under the protection of the table lock, individual entry changes happen
- * while holding that entry's spinlock.  The table lock nests outside the
+ * while holding that entry's mutex.  The table lock nests outside the
  * entry locks.  Allocations of new entries take the table lock as writers so
  * no other lookups can happen while allocating new entries.  Entry updates
  * take the table lock as readers so multiple entries can be updated in
@@ -78,72 +63,60 @@
  * and therefore can happen in parallel with entry allocation but no entry
  * can change state or increment its ref count during allocation as both of
  * these perform lookups.
+ *
+ * When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry.
  */
 
 static inline unsigned int
-vlan_prio(const struct l2t_entry *e)
-{
-	return e->vlan >> 13;
-}
-
-static inline unsigned int
 arp_hash(u32 key, int ifindex, const struct l2t_data *d)
 {
 	return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
 }
 
-static inline void
-neigh_replace(struct l2t_entry *e, struct llentry *neigh)
-{
-	LLE_WLOCK(neigh);
-	LLE_ADDREF(neigh);
-	LLE_WUNLOCK(neigh);
-	
-	if (e->neigh)
-		LLE_FREE(e->neigh);
-	e->neigh = neigh;
-}
-
 /*
- * Set up an L2T entry and send any packets waiting in the arp queue.  The
- * supplied mbuf is used for the CPL_L2T_WRITE_REQ.  Must be called with the
- * entry locked.
+ * Set up an L2T entry and send any packets waiting in the arp queue.  Must be
+ * called with the entry locked.
  */
 static int
-setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
-    struct l2t_entry *e)
+setup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e)
 {
+	struct mbuf *m;
 	struct cpl_l2t_write_req *req;
+	struct port_info *pi = &sc->port[e->smt_idx];	/* smt_idx is port_id */
 
-	if (!m) {
-		if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
-		    return (ENOMEM);
+	mtx_assert(&e->lock, MA_OWNED);
+
+	m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req);
+	if (m == NULL) {
+		log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n",
+		    __func__, e->idx);
+		return (ENOMEM);
 	}
-	/*
-	 * XXX MH_ALIGN
-	 */
-	req = mtod(m, struct cpl_l2t_write_req *);
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-	
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
 	req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
-			    V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
-			    V_L2T_W_PRIO(vlan_prio(e)));
+	    V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
+	    V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan)));
+	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
 
-	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
-	m_set_priority(m, CPL_PRIORITY_CONTROL);
-	cxgb_ofld_send(dev, m);
+	t3_offload_tx(sc, m);
+
+	/*
+	 * XXX: We used pi->first_qset to send the L2T_WRITE_REQ.  If any mbuf
+	 * on the arpq is going out via another queue set associated with the
+	 * port then it has a bad race with the L2T_WRITE_REQ.  Ideally we
+	 * should wait till the reply to the write before draining the arpq.
+	 */
 	while (e->arpq_head) {
 		m = e->arpq_head;
 		e->arpq_head = m->m_next;
 		m->m_next = NULL;
-		cxgb_ofld_send(dev, m);
+		t3_offload_tx(sc, m);
 	}
 	e->arpq_tail = NULL;
-	e->state = L2T_STATE_VALID;
 
-	return 0;
+	return (0);
 }
 
 /*
@@ -153,6 +126,8 @@
 static inline void
 arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
 {
+	mtx_assert(&e->lock, MA_OWNED);
+
 	m->m_next = NULL;
 	if (e->arpq_head)
 		e->arpq_tail->m_next = m;
@@ -161,113 +136,149 @@
 	e->arpq_tail = m;
 }
 
-int
-t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e)
+static void
+resolution_failed_mbuf(struct mbuf *m)
 {
-	struct llentry *lle =  e->neigh;
-	struct sockaddr_in sin;
+	log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p",
+	    __func__, m, mtod(m, void *));
+}
 
-	bzero(&sin, sizeof(struct sockaddr_in));
+static void
+resolution_failed(struct l2t_entry *e)
+{
+	struct mbuf *m;
+
+	mtx_assert(&e->lock, MA_OWNED);
+
+	while (e->arpq_head) {
+		m = e->arpq_head;
+		e->arpq_head = m->m_next;
+		m->m_next = NULL;
+		resolution_failed_mbuf(m);
+	}
+	e->arpq_tail = NULL;
+}
+
+static void
+update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
+    uint16_t vtag)
+{
+
+	mtx_assert(&e->lock, MA_OWNED);
+
+	/*
+	 * The entry may be in active use (e->refcount > 0) or not.  We update
+	 * it even when it's not as this simplifies the case where we decide to
+	 * reuse the entry later.
+	 */
+
+	if (lladdr == NULL &&
+	    (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
+		/*
+		 * Never got a valid L2 address for this one.  Just mark it as
+		 * failed instead of removing it from the hash (for which we'd
+		 * need to wlock the table).
+		 */
+		e->state = L2T_STATE_FAILED;
+		resolution_failed(e);
+		return;
+
+	} else if (lladdr == NULL) {
+
+		/* Valid or already-stale entry was deleted (or expired) */
+
+		KASSERT(e->state == L2T_STATE_VALID ||
+		    e->state == L2T_STATE_STALE,
+		    ("%s: lladdr NULL, state %d", __func__, e->state));
+
+		e->state = L2T_STATE_STALE;
+
+	} else {
+
+		if (e->state == L2T_STATE_RESOLVING ||
+		    e->state == L2T_STATE_FAILED ||
+		    memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
+
+			/* unresolved -> resolved; or dmac changed */
+
+			memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
+			e->vlan = vtag;
+			setup_l2e_send_pending(sc, e);
+		}
+		e->state = L2T_STATE_VALID;
+	}
+}
+
+static int
+resolve_entry(struct adapter *sc, struct l2t_entry *e)
+{
+	struct tom_data *td = sc->tom_softc;
+	struct toedev *tod = &td->tod;
+	struct sockaddr_in sin = {0};
+	uint8_t dmac[ETHER_ADDR_LEN];
+	uint16_t vtag = EVL_VLID_MASK;
+	int rc;
+
 	sin.sin_family = AF_INET;
 	sin.sin_len = sizeof(struct sockaddr_in);
-	sin.sin_addr.s_addr = e->addr;
+	SINADDR(&sin) = e->addr;
 
-	CTR2(KTR_CXGB, "send slow on rt=%p eaddr=0x%08x\n", rt, e->addr);
+	rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
+	if (rc == EWOULDBLOCK)
+		return (rc);
+
+	mtx_lock(&e->lock);
+	update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
+	mtx_unlock(&e->lock);
+
+	return (rc);
+}
+
+int
+t3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
+{
+
 again:
 	switch (e->state) {
 	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
-		arpresolve(rt->rt_ifp, rt, NULL,
-		     (struct sockaddr *)&sin, e->dmac, &lle);
-		mtx_lock(&e->lock);
-		if (e->state == L2T_STATE_STALE)
-			e->state = L2T_STATE_VALID;
-		mtx_unlock(&e->lock);
+
+		if (resolve_entry(sc, e) != EWOULDBLOCK)
+			goto again;	/* entry updated, re-examine state */
+
+		/* Fall through */
+
 	case L2T_STATE_VALID:     /* fast-path, send the packet on */
-		return cxgb_ofld_send(dev, m);
+
+		return (t3_offload_tx(sc, m));
+
 	case L2T_STATE_RESOLVING:
 		mtx_lock(&e->lock);
-		if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
+		if (e->state != L2T_STATE_RESOLVING) {
 			mtx_unlock(&e->lock);
 			goto again;
 		}
 		arpq_enqueue(e, m);
 		mtx_unlock(&e->lock);
-		/*
-		 * Only the first packet added to the arpq should kick off
-		 * resolution.  However, because the m_gethdr below can fail,
-		 * we allow each packet added to the arpq to retry resolution
-		 * as a way of recovering from transient memory exhaustion.
-		 * A better way would be to use a work request to retry L2T
-		 * entries when there's no memory.
-		 */
-		if (arpresolve(rt->rt_ifp, rt, NULL,
-		     (struct sockaddr *)&sin, e->dmac, &lle) == 0) {
-			CTR6(KTR_CXGB, "mac=%x:%x:%x:%x:%x:%x\n",
-			    e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
-			
-			if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
-				return (ENOMEM);
 
-			mtx_lock(&e->lock);
-			if (e->arpq_head) 
-				setup_l2e_send_pending(dev, m, e);
-			else
-				m_freem(m);
-			mtx_unlock(&e->lock);
-		}
+		if (resolve_entry(sc, e) == EWOULDBLOCK)
+			break;
+
+		mtx_lock(&e->lock);
+		if (e->state == L2T_STATE_VALID && e->arpq_head)
+			setup_l2e_send_pending(sc, e);
+		if (e->state == L2T_STATE_FAILED)
+			resolution_failed(e);
+		mtx_unlock(&e->lock);
+		break;
+
+	case L2T_STATE_FAILED:
+		resolution_failed_mbuf(m);
+		return (EHOSTUNREACH);
 	}
-	return 0;
+
+	return (0);
 }
 
-void
-t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e)
-{
-	struct mbuf *m0;
-	struct sockaddr_in sin;
-	sin.sin_family = AF_INET;
-	sin.sin_len = sizeof(struct sockaddr_in);
-	sin.sin_addr.s_addr = e->addr;
-	struct llentry *lle;
-	
-	if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
-		return;
-
-	rt = e->neigh;
-again:
-	switch (e->state) {
-	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
-		arpresolve(rt->rt_ifp, rt, NULL,
-		     (struct sockaddr *)&sin, e->dmac, &lle);
-		mtx_lock(&e->lock);
-		if (e->state == L2T_STATE_STALE) {
-			e->state = L2T_STATE_VALID;
-		}
-		mtx_unlock(&e->lock);
-		return;
-	case L2T_STATE_VALID:     /* fast-path, send the packet on */
-		return;
-	case L2T_STATE_RESOLVING:
-		mtx_lock(&e->lock);
-		if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
-			mtx_unlock(&e->lock);
-			goto again;
-		}
-		mtx_unlock(&e->lock);
-		
-		/*
-		 * Only the first packet added to the arpq should kick off
-		 * resolution.  However, because the alloc_skb below can fail,
-		 * we allow each packet added to the arpq to retry resolution
-		 * as a way of recovering from transient memory exhaustion.
-		 * A better way would be to use a work request to retry L2T
-		 * entries when there's no memory.
-		 */
-		arpresolve(rt->rt_ifp, rt, NULL,
-		    (struct sockaddr *)&sin, e->dmac, &lle);
-
-	}
-	return;
-}
 /*
  * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
  */
@@ -276,15 +287,19 @@
 {
 	struct l2t_entry *end, *e, **p;
 
+	rw_assert(&d->lock, RA_WLOCKED);
+
 	if (!atomic_load_acq_int(&d->nfree))
-		return NULL;
+		return (NULL);
 
 	/* there's definitely a free entry */
-	for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
+	for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) {
 		if (atomic_load_acq_int(&e->refcnt) == 0)
 			goto found;
+	}
 
-	for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ;
+	for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e)
+		continue;
 found:
 	d->rover = e + 1;
 	atomic_add_int(&d->nfree, -1);
@@ -294,90 +309,37 @@
 	 * presently in the hash table.  We need to remove it.
 	 */
 	if (e->state != L2T_STATE_UNUSED) {
-		int hash = arp_hash(e->addr, e->ifindex, d);
+		int hash = arp_hash(e->addr, e->ifp->if_index, d);
 
-		for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
+		for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) {
 			if (*p == e) {
 				*p = e->next;
 				break;
 			}
+		}
 		e->state = L2T_STATE_UNUSED;
 	}
-	
-	return e;
-}
 
-/*
- * Called when an L2T entry has no more users.  The entry is left in the hash
- * table since it is likely to be reused but we also bump nfree to indicate
- * that the entry can be reallocated for a different neighbor.  We also drop
- * the existing neighbor reference in case the neighbor is going away and is
- * waiting on our reference.
- *
- * Because entries can be reallocated to other neighbors once their ref count
- * drops to 0 we need to take the entry's lock to avoid races with a new
- * incarnation.
- */
-void
-t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
-{
-	struct llentry *lle;
-
-	mtx_lock(&e->lock);
-	if (atomic_load_acq_int(&e->refcnt) == 0) {  /* hasn't been recycled */
-		lle = e->neigh;
-		e->neigh = NULL;
-	}
-	
-	mtx_unlock(&e->lock);
-	atomic_add_int(&d->nfree, 1);
-	if (lle)
-		LLE_FREE(lle);
-}
-
-
-/*
- * Update an L2T entry that was previously used for the same next hop as neigh.
- * Must be called with softirqs disabled.
- */
-static inline void
-reuse_entry(struct l2t_entry *e, struct llentry *neigh)
-{
-
-	mtx_lock(&e->lock);                /* avoid race with t3_l2t_free */
-	if (neigh != e->neigh)
-		neigh_replace(e, neigh);
-	
-	if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), sizeof(e->dmac)) ||
-	    (neigh->rt_expire > time_uptime))
-		e->state = L2T_STATE_RESOLVING;
-	else if (la->la_hold == NULL)
-		e->state = L2T_STATE_VALID;
-	else
-		e->state = L2T_STATE_STALE;
-	mtx_unlock(&e->lock);
+	return (e);
 }
 
 struct l2t_entry *
-t3_l2t_get(struct t3cdev *dev, struct llentry *neigh, struct ifnet *ifp,
-	struct sockaddr *sa)
+t3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
 {
+	struct tom_data *td = pi->adapter->tom_softc;
 	struct l2t_entry *e;
-	struct l2t_data *d = L2DATA(dev);
-	u32 addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr;
-	int ifidx = ifp->if_index;
-	int hash = arp_hash(addr, ifidx, d);
-	unsigned int smt_idx = ((struct port_info *)ifp->if_softc)->port_id;
+	struct l2t_data *d = td->l2t;
+	uint32_t addr = SINADDR(sa);
+	int hash = arp_hash(addr, ifp->if_index, d);
+	unsigned int smt_idx = pi->port_id;
 
 	rw_wlock(&d->lock);
-	for (e = d->l2tab[hash].first; e; e = e->next)
-		if (e->addr == addr && e->ifindex == ifidx &&
-		    e->smt_idx == smt_idx) {
+	for (e = d->l2tab[hash].first; e; e = e->next) {
+		if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
 			l2t_hold(d, e);
-			if (atomic_load_acq_int(&e->refcnt) == 1)
-				reuse_entry(e, neigh);
 			goto done;
 		}
+	}
 
 	/* Need to allocate a new entry */
 	e = alloc_l2e(d);
@@ -385,116 +347,59 @@
 		mtx_lock(&e->lock);          /* avoid race with t3_l2t_free */
 		e->next = d->l2tab[hash].first;
 		d->l2tab[hash].first = e;
-		rw_wunlock(&d->lock);
-		
+
 		e->state = L2T_STATE_RESOLVING;
 		e->addr = addr;
-		e->ifindex = ifidx;
+		e->ifp = ifp;
 		e->smt_idx = smt_idx;
 		atomic_store_rel_int(&e->refcnt, 1);
-		e->neigh = NULL;
-		
-		
-		neigh_replace(e, neigh);
-#ifdef notyet
-		/* 
-		 * XXX need to add accessor function for vlan tag
-		 */
-		if (neigh->rt_ifp->if_vlantrunk)
-			e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
-		else
-#endif			    
-			e->vlan = VLAN_NONE;
+
+		KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented."));
+		e->vlan = VLAN_NONE;
+
 		mtx_unlock(&e->lock);
+	}
 
-		return (e);
-	}
-	
 done:
 	rw_wunlock(&d->lock);
-	return e;
-}
 
-/*
- * Called when address resolution fails for an L2T entry to handle packets
- * on the arpq head.  If a packet specifies a failure handler it is invoked,
- * otherwise the packets is sent to the TOE.
- *
- * XXX: maybe we should abandon the latter behavior and just require a failure
- * handler.
- */
-static void
-handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq)
-{
-
-	while (arpq) {
-		struct mbuf *m = arpq;
-#ifdef notyet		
-		struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m);
-#endif
-		arpq = m->m_next;
-		m->m_next = NULL;
-#ifdef notyet		
-		if (cb->arp_failure_handler)
-			cb->arp_failure_handler(dev, m);
-		else
-#endif			
-			cxgb_ofld_send(dev, m);
-	}
-
+	return (e);
 }
 
 void
-t3_l2t_update(struct t3cdev *dev, struct llentry *neigh,
-    uint8_t *enaddr, struct sockaddr *sa)
+t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+    uint8_t *lladdr, uint16_t vtag)
 {
+	struct tom_data *td = t3_tomdata(tod);
+	struct adapter *sc = tod->tod_softc;
 	struct l2t_entry *e;
-	struct mbuf *arpq = NULL;
-	struct l2t_data *d = L2DATA(dev);
-	u32 addr = *(u32 *) &((struct sockaddr_in *)sa)->sin_addr;
-	int hash = arp_hash(addr, ifidx, d);
-	struct llinfo_arp *la;
+	struct l2t_data *d = td->l2t;
+	u32 addr = *(u32 *) &SIN(sa)->sin_addr;
+	int hash = arp_hash(addr, ifp->if_index, d);
 
 	rw_rlock(&d->lock);
 	for (e = d->l2tab[hash].first; e; e = e->next)
-		if (e->addr == addr) {
+		if (e->addr == addr && e->ifp == ifp) {
 			mtx_lock(&e->lock);
 			goto found;
 		}
 	rw_runlock(&d->lock);
-	CTR1(KTR_CXGB, "t3_l2t_update: addr=0x%08x not found", addr);
+
+	/*
+	 * This is of no interest to us.  We've never had an offloaded
+	 * connection to this destination, and we aren't attempting one right
+	 * now.
+	 */
 	return;
 
 found:
-	printf("found 0x%08x\n", addr);
+	rw_runlock(&d->lock);
 
-	rw_runlock(&d->lock);
-	memcpy(e->dmac, enaddr, ETHER_ADDR_LEN);
-	printf("mac=%x:%x:%x:%x:%x:%x\n",
-	    e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
-	
-	if (atomic_load_acq_int(&e->refcnt)) {
-		if (neigh != e->neigh)
-			neigh_replace(e, neigh);
-		
-		la = (struct llinfo_arp *)neigh->rt_llinfo; 
-		if (e->state == L2T_STATE_RESOLVING) {
-			
-			if (la->la_asked >= 5 /* arp_maxtries */) {
-				arpq = e->arpq_head;
-				e->arpq_head = e->arpq_tail = NULL;
-			} else
-				setup_l2e_send_pending(dev, NULL, e);
-		} else {
-			e->state = L2T_STATE_VALID;
-			if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), 6))
-				setup_l2e_send_pending(dev, NULL, e);
-		}
-	}
+	KASSERT(e->state != L2T_STATE_UNUSED,
+	    ("%s: unused entry in the hash.", __func__));
+
+	update_entry(sc, e, lladdr, vtag);
 	mtx_unlock(&e->lock);
-
-	if (arpq)
-		handle_failed_resolution(dev, arpq);
 }
 
 struct l2t_data *
@@ -503,9 +408,9 @@
 	struct l2t_data *d;
 	int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
 
-	d = cxgb_alloc_mem(size);
+	d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO);
 	if (!d)
-		return NULL;
+		return (NULL);
 
 	d->nentries = l2t_capacity;
 	d->rover = &d->l2tab[1];	/* entry 0 is not used */
@@ -515,10 +420,10 @@
 	for (i = 0; i < l2t_capacity; ++i) {
 		d->l2tab[i].idx = i;
 		d->l2tab[i].state = L2T_STATE_UNUSED;
-		mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF);
+		mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
 		atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
 	}
-	return d;
+	return (d);
 }
 
 void
@@ -530,5 +435,26 @@
 	for (i = 0; i < d->nentries; ++i) 
 		mtx_destroy(&d->l2tab[i].lock);
 
-	cxgb_free_mem(d);
+	free(d, M_CXGB);
 }
+
+static int
+do_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct cpl_l2t_write_rpl *rpl = mtod(m, void *);
+
+	if (rpl->status != CPL_ERR_NONE)
+		log(LOG_ERR,
+		       "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
+		       rpl->status, GET_TID(rpl));
+
+	m_freem(m);
+	return (0);
+}
+
+void
+t3_init_l2t_cpl_handlers(struct adapter *sc)
+{
+	t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
+}
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_l2t.h
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_l2t.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/tom/cxgb_l2t.h	Wed Jul 25 17:04:43 2012 +0300
@@ -1,6 +1,6 @@
 /**************************************************************************
 
-Copyright (c) 2007-2008, Chelsio Inc.
+Copyright (c) 2007-2009, Chelsio Inc.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -25,32 +25,25 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_l2t.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 #ifndef _CHELSIO_L2T_H
 #define _CHELSIO_L2T_H
 
-#include <ulp/toecore/cxgb_toedev.h>
 #include <sys/lock.h>
-
-#if __FreeBSD_version > 700000
 #include <sys/rwlock.h>
-#else
-#define rwlock mtx
-#define rw_wlock(x) mtx_lock((x))
-#define rw_wunlock(x) mtx_unlock((x))
-#define rw_rlock(x) mtx_lock((x))
-#define rw_runlock(x) mtx_unlock((x))
-#define rw_init(x, str) mtx_init((x), (str), NULL, MTX_DEF)
-#define rw_destroy(x) mtx_destroy((x))
-#endif
 
 enum {
-	L2T_STATE_VALID,      /* entry is up to date */
-	L2T_STATE_STALE,      /* entry may be used but needs revalidation */
-	L2T_STATE_RESOLVING,  /* entry needs address resolution */
-	L2T_STATE_UNUSED      /* entry not in use */
+	L2T_SIZE = 2048
+};
+
+enum {
+	L2T_STATE_VALID,	/* entry is up to date */
+	L2T_STATE_STALE,	/* entry may be used but needs revalidation */
+	L2T_STATE_RESOLVING,	/* entry needs address resolution */
+	L2T_STATE_FAILED,	/* failed to resolve */
+	L2T_STATE_UNUSED	/* entry not in use */
 };
 
 /*
@@ -64,18 +57,17 @@
 struct l2t_entry {
 	uint16_t state;               /* entry state */
 	uint16_t idx;                 /* entry index */
-	uint32_t addr;                /* dest IP address */
-	int ifindex;                  /* neighbor's net_device's ifindex */
+	uint32_t addr;                /* nexthop IP address */
+	struct ifnet *ifp;            /* outgoing interface */
 	uint16_t smt_idx;             /* SMT index */
 	uint16_t vlan;                /* VLAN TCI (id: bits 0-11, prio: 13-15 */
-	struct llentry *neigh;        /* associated neighbour */
 	struct l2t_entry *first;      /* start of hash chain */
 	struct l2t_entry *next;       /* next l2t_entry on chain */
 	struct mbuf *arpq_head;       /* queue of packets awaiting resolution */
 	struct mbuf *arpq_tail;
 	struct mtx lock;
 	volatile uint32_t refcnt;     /* entry reference count */
-	uint8_t dmac[6];              /* neighbour's MAC address */
+	uint8_t dmac[ETHER_ADDR_LEN]; /* nexthop's MAC address */
 };
 
 struct l2t_data {
@@ -86,76 +78,37 @@
 	struct l2t_entry l2tab[0];
 };
 
-typedef void (*arp_failure_handler_func)(struct t3cdev *dev,
-					 struct mbuf *m);
+void t3_l2e_free(struct l2t_data *, struct l2t_entry *e);
+void t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+    uint8_t *lladdr, uint16_t vtag);
+struct l2t_entry *t3_l2t_get(struct port_info *, struct ifnet *,
+    struct sockaddr *);
+int t3_l2t_send_slow(struct adapter *, struct mbuf *, struct l2t_entry *);
+struct l2t_data *t3_init_l2t(unsigned int);
+void t3_free_l2t(struct l2t_data *);
+void t3_init_l2t_cpl_handlers(struct adapter *);
 
-typedef void (*opaque_arp_failure_handler_func)(void *dev,
-					 struct mbuf *m);
-
-/*
- * Callback stored in an skb to handle address resolution failure.
- */
-struct l2t_mbuf_cb {
-	arp_failure_handler_func arp_failure_handler;
-};
-
-/*
- * XXX 
- */
-#define L2T_MBUF_CB(skb) ((struct l2t_mbuf_cb *)(skb)->cb)
-
-
-static __inline void set_arp_failure_handler(struct mbuf *m,
-					   arp_failure_handler_func hnd)
+static inline int
+l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
 {
-	m->m_pkthdr.header = (opaque_arp_failure_handler_func)hnd;
-
+	if (__predict_true(e->state == L2T_STATE_VALID))
+		return t3_offload_tx(sc, m);
+	else
+		return t3_l2t_send_slow(sc, m, e);
 }
 
-/*
- * Getting to the L2 data from an offload device.
- */
-#define L2DATA(dev) ((dev)->l2opt)
-
-void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e);
-void t3_l2t_update(struct t3cdev *dev, struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
-struct l2t_entry *t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh,
-    struct ifnet *ifp, struct sockaddr *sa);
-int t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m,
-		     struct l2t_entry *e);
-void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e);
-struct l2t_data *t3_init_l2t(unsigned int l2t_capacity);
-void t3_free_l2t(struct l2t_data *d);
-
-#ifdef CONFIG_PROC_FS
-int t3_l2t_proc_setup(struct proc_dir_entry *dir, struct l2t_data *d);
-void t3_l2t_proc_free(struct proc_dir_entry *dir);
-#else
-#define l2t_proc_setup(dir, d) 0
-#define l2t_proc_free(dir)
-#endif
-
-int cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m);
-
-static inline int l2t_send(struct t3cdev *dev, struct mbuf *m,
-			   struct l2t_entry *e)
+static inline void
+l2t_release(struct l2t_data *d, struct l2t_entry *e)
 {
-	if (__predict_true(e->state == L2T_STATE_VALID)) {
-		return cxgb_ofld_send(dev, (struct mbuf *)m);
-	}
-	return t3_l2t_send_slow(dev, (struct mbuf *)m, e);
-}
-
-static inline void l2t_release(struct l2t_data *d, struct l2t_entry *e)
-{
-	if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
-		t3_l2e_free(d, e);
-}
-
-static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e)
-{
-	if (atomic_fetchadd_int(&e->refcnt, 1) == 1)  /* 0 -> 1 transition */
+	if (atomic_fetchadd_int(&e->refcnt, -1) == 1) /* 1 -> 0 transition */
 		atomic_add_int(&d->nfree, 1);
 }
 
+static inline void
+l2t_hold(struct l2t_data *d, struct l2t_entry *e)
+{
+	if (atomic_fetchadd_int(&e->refcnt, 1) == 0)  /* 0 -> 1 transition */
+		atomic_add_int(&d->nfree, -1);
+}
+
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_listen.c
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_listen.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/tom/cxgb_listen.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,280 +1,237 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_listen.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
 #include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/fcntl.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/mutex.h>
-
-#include <sys/sockopt.h>
-#include <sys/sockstate.h>
-#include <sys/sockbuf.h>
-
+#include <sys/refcount.h>
 #include <sys/socket.h>
-#include <sys/syslog.h>
-
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
 #include <net/if.h>
 #include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/tcp_var.h>
+#define TCPSTATES
+#include <netinet/tcp_fsm.h>
+#include <netinet/toecore.h>
 
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
+#include "cxgb_include.h"
+#include "ulp/tom/cxgb_tom.h"
+#include "ulp/tom/cxgb_l2t.h"
+#include "ulp/tom/cxgb_toepcb.h"
 
+static void t3_send_reset_synqe(struct toedev *, struct synq_entry *);
 
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
+static int
+alloc_stid(struct tid_info *t, void *ctx)
+{
+	int stid = -1;
 
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_fsm.h>
+	mtx_lock(&t->stid_lock);
+	if (t->sfree) {
+		union listen_entry *p = t->sfree;
 
-#include <netinet/tcp_offload.h>
-#include <net/route.h>
+		stid = (p - t->stid_tab) + t->stid_base;
+		t->sfree = p->next;
+		p->ctx = ctx;
+		t->stids_in_use++;
+	}
+	mtx_unlock(&t->stid_lock);
+	return (stid);
+}
 
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_t3_cpl.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <cxgb_offload.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_l2t.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
+static void
+free_stid(struct tid_info *t, int stid)
+{
+	union listen_entry *p = stid2entry(t, stid);
 
+	mtx_lock(&t->stid_lock);
+	p->next = t->sfree;
+	t->sfree = p;
+	t->stids_in_use--;
+	mtx_unlock(&t->stid_lock);
+}
 
-static struct listen_info *listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid);
-static int listen_hash_del(struct tom_data *d, struct socket *so);
+static struct listen_ctx *
+alloc_lctx(struct tom_data *td, struct inpcb *inp, int qset)
+{
+	struct listen_ctx *lctx;
 
-/*
- * Process a CPL_CLOSE_LISTSRV_RPL message.  If the status is good we release
- * the STID.
- */
-static int
-do_close_server_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	struct cpl_close_listserv_rpl *rpl = cplhdr(m);
-	unsigned int stid = GET_TID(rpl);
+	INP_WLOCK_ASSERT(inp);
 
-	if (rpl->status != CPL_ERR_NONE)
-		log(LOG_ERR, "Unexpected CLOSE_LISTSRV_RPL status %u for "
-		       "STID %u\n", rpl->status, stid);
-	else {
-		struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
+	lctx = malloc(sizeof(struct listen_ctx), M_CXGB, M_NOWAIT | M_ZERO);
+	if (lctx == NULL)
+		return (NULL);
 
-		cxgb_free_stid(cdev, stid);
-		free(listen_ctx, M_CXGB);
+	lctx->stid = alloc_stid(&td->tid_maps, lctx);
+	if (lctx->stid < 0) {
+		free(lctx, M_CXGB);
+		return (NULL);
 	}
 
-	return (CPL_RET_BUF_DONE);
+	lctx->inp = inp;
+	in_pcbref(inp);
+
+	lctx->qset = qset;
+	refcount_init(&lctx->refcnt, 1);
+	TAILQ_INIT(&lctx->synq);
+
+	return (lctx);
+}
+
+/* Don't call this directly, use release_lctx instead */
+static int
+free_lctx(struct tom_data *td, struct listen_ctx *lctx)
+{
+	struct inpcb *inp = lctx->inp;
+
+	INP_WLOCK_ASSERT(inp);
+	KASSERT(lctx->refcnt == 0,
+	    ("%s: refcnt %d", __func__, lctx->refcnt));
+	KASSERT(TAILQ_EMPTY(&lctx->synq),
+	    ("%s: synq not empty.", __func__));
+	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
+
+	CTR4(KTR_CXGB, "%s: stid %u, lctx %p, inp %p",
+	    __func__, lctx->stid, lctx, lctx->inp);
+
+	free_stid(&td->tid_maps, lctx->stid);
+	free(lctx, M_CXGB);
+
+	return in_pcbrele_wlocked(inp);
+}
+
+static void
+hold_lctx(struct listen_ctx *lctx)
+{
+
+	refcount_acquire(&lctx->refcnt);
+}
+
+static inline uint32_t
+listen_hashfn(void *key, u_long mask)
+{
+
+	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
 }
 
 /*
- * Process a CPL_PASS_OPEN_RPL message.  Remove the socket from the listen hash
- * table and free the STID if there was any error, otherwise nothing to do.
+ * Add a listen_ctx entry to the listen hash table.
  */
-static int
-do_pass_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+static void
+listen_hash_add(struct tom_data *td, struct listen_ctx *lctx)
 {
-       	struct cpl_pass_open_rpl *rpl = cplhdr(m);
+	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
 
-	if (rpl->status != CPL_ERR_NONE) {
-		int stid = GET_TID(rpl);
-		struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
-		struct tom_data *d = listen_ctx->tom_data;
-		struct socket *lso = listen_ctx->lso;
-
-#if VALIDATE_TID
-		if (!lso)
-			return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
-#endif
-		/*
-		 * Note: It is safe to unconditionally call listen_hash_del()
-		 * at this point without risking unhashing a reincarnation of
-		 * an already closed socket (i.e., there is no listen, close,
-		 * listen, free the sock for the second listen while processing
-		 * a message for the first race) because we are still holding
-		 * a reference on the socket.  It is possible that the unhash
-		 * will fail because the socket is already closed, but we can't
-		 * unhash the wrong socket because it is impossible for the
-		 * socket to which this message refers to have reincarnated.
-		 */
-		listen_hash_del(d, lso);
-		cxgb_free_stid(cdev, stid);
-#ifdef notyet
-		/*
-		 * XXX need to unreference the inpcb
-		 * but we have no way of knowing that other TOMs aren't referencing it 
-		 */
-		sock_put(lso);
-#endif
-		free(listen_ctx, M_CXGB);
-	}
-	return CPL_RET_BUF_DONE;
-}
-
-void
-t3_init_listen_cpl_handlers(void)
-{
-	t3tom_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
-	t3tom_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
-}
-
-static inline int
-listen_hashfn(const struct socket *so)
-{
-	return ((unsigned long)so >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
+	mtx_lock(&td->lctx_hash_lock);
+	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
+	td->lctx_count++;
+	mtx_unlock(&td->lctx_hash_lock);
 }
 
 /*
- * Create and add a listen_info entry to the listen hash table.  This and the
- * listen hash table functions below cannot be called from softirqs.
+ * Look for the listening socket's context entry in the hash and return it.
  */
-static struct listen_info *
-listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid)
+static struct listen_ctx *
+listen_hash_find(struct tom_data *td, struct inpcb *inp)
 {
-	struct listen_info *p;
+	int bucket = listen_hashfn(inp, td->listen_mask);
+	struct listen_ctx *lctx;
 
-	p = malloc(sizeof(*p), M_CXGB, M_NOWAIT|M_ZERO);
-	if (p) {
-		int bucket = listen_hashfn(so);
+	mtx_lock(&td->lctx_hash_lock);
+	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
+		if (lctx->inp == inp)
+			break;
+	}
+	mtx_unlock(&td->lctx_hash_lock);
 
-		p->so = so;	/* just a key, no need to take a reference */
-		p->stid = stid;
-		mtx_lock(&d->listen_lock);		
-		p->next = d->listen_hash_tab[bucket];
-		d->listen_hash_tab[bucket] = p;
-		mtx_unlock(&d->listen_lock);
-	}
-	return p;
+	return (lctx);
 }
 
 /*
- * Given a pointer to a listening socket return its server TID by consulting
- * the socket->stid map.  Returns -1 if the socket is not in the map.
+ * Removes the listen_ctx structure for inp from the hash and returns it.
  */
-static int
-listen_hash_find(struct tom_data *d, struct socket *so)
+static struct listen_ctx *
+listen_hash_del(struct tom_data *td, struct inpcb *inp)
 {
-	int stid = -1, bucket = listen_hashfn(so);
-	struct listen_info *p;
+	int bucket = listen_hashfn(inp, td->listen_mask);
+	struct listen_ctx *lctx, *l;
 
-	mtx_lock(&d->listen_lock);
-	for (p = d->listen_hash_tab[bucket]; p; p = p->next)
-		if (p->so == so) {
-			stid = p->stid;
+	mtx_lock(&td->lctx_hash_lock);
+	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
+		if (lctx->inp == inp) {
+			LIST_REMOVE(lctx, link);
+			td->lctx_count--;
 			break;
 		}
-	mtx_unlock(&d->listen_lock);
-	return stid;
+	}
+	mtx_unlock(&td->lctx_hash_lock);
+
+	return (lctx);
 }
 
 /*
- * Delete the listen_info structure for a listening socket.  Returns the server
- * TID for the socket if it is present in the socket->stid map, or -1.
+ * Releases a hold on the lctx.  Must be called with the listening socket's inp
+ * locked.  The inp may be freed by this function and it returns NULL to
+ * indicate this.
  */
-static int
-listen_hash_del(struct tom_data *d, struct socket *so)
+static struct inpcb *
+release_lctx(struct tom_data *td, struct listen_ctx *lctx)
 {
-	int bucket, stid = -1;
-	struct listen_info *p, **prev;
+	struct inpcb *inp = lctx->inp;
+	int inp_freed = 0;
 
-	bucket = listen_hashfn(so);
-	prev  = &d->listen_hash_tab[bucket];
+	INP_WLOCK_ASSERT(inp);
+	if (refcount_release(&lctx->refcnt))
+		inp_freed = free_lctx(td, lctx);
 
-	mtx_lock(&d->listen_lock);
-	for (p = *prev; p; prev = &p->next, p = p->next)
-		if (p->so == so) {
-			stid = p->stid;
-			*prev = p->next;
-			free(p, M_CXGB);
-			break;
-		}
-	mtx_unlock(&d->listen_lock);
-	
-	return (stid);
+	return (inp_freed ? NULL : inp);
 }
 
-/*
- * Start a listening server by sending a passive open request to HW.
- */
-void
-t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
+static int
+create_server(struct adapter *sc, struct listen_ctx *lctx)
 {
-	int stid;
 	struct mbuf *m;
 	struct cpl_pass_open_req *req;
-	struct tom_data *d = TOM_DATA(dev);
-	struct inpcb *inp = so_sotoinpcb(so);
-	struct listen_ctx *ctx;
+	struct inpcb *inp = lctx->inp;
 
-	if (!TOM_TUNABLE(dev, activated))
-		return;
+	m = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, req);
+	if (m == NULL)
+		return (ENOMEM);
 
-	if (listen_hash_find(d, so) != -1)
-		return;
-	
-	CTR1(KTR_TOM, "start listen on port %u", ntohs(inp->inp_lport));
-	ctx = malloc(sizeof(*ctx), M_CXGB, M_NOWAIT|M_ZERO);
-
-	if (!ctx)
-		return;
-
-	ctx->tom_data = d;
-	ctx->lso = so;
-	ctx->ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) ? ULP_MODE_TCPDDP : 0;
-	LIST_INIT(&ctx->synq_head);
-	
-	stid = cxgb_alloc_stid(d->cdev, d->client, ctx);
-	if (stid < 0)
-		goto free_ctx;
-
-	m = m_gethdr(M_NOWAIT, MT_DATA);
-	if (m == NULL)
-		goto free_stid;
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-	
-	if (!listen_hash_add(d, so, stid))
-		goto free_all;
-
-	req = mtod(m, struct cpl_pass_open_req *);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
 	req->local_port = inp->inp_lport; 
 	memcpy(&req->local_ip, &inp->inp_laddr, 4);
 	req->peer_port = 0;
@@ -284,60 +241,900 @@
 	req->opt0l = htonl(V_RCV_BUFSIZ(16));
 	req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
 
-	m_set_priority(m, CPL_PRIORITY_LISTEN); 
-	cxgb_ofld_send(cdev, m);
-	return;
+	t3_offload_tx(sc, m);
 
-free_all:
-	m_free(m);
-free_stid:
-	cxgb_free_stid(cdev, stid);
-#if 0	
-	sock_put(sk);
-#endif	
-free_ctx:
-	free(ctx, M_CXGB);
+	return (0);
+}
+
+static int
+destroy_server(struct adapter *sc, struct listen_ctx *lctx)
+{
+	struct mbuf *m;
+	struct cpl_close_listserv_req *req;
+
+	m = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, req);
+	if (m == NULL)
+		return (ENOMEM);
+
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
+	    lctx->stid));
+	req->cpu_idx = 0;
+
+	t3_offload_tx(sc, m);
+
+	return (0);
+}
+
+/*
+ * Process a CPL_CLOSE_LISTSRV_RPL message.  If the status is good we release
+ * the STID.
+ */
+static int
+do_close_server_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct cpl_close_listserv_rpl *rpl = mtod(m, void *);
+	unsigned int stid = GET_TID(rpl);
+	struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid);
+	struct inpcb *inp = lctx->inp;
+
+	CTR3(KTR_CXGB, "%s: stid %u, status %u", __func__, stid, rpl->status);
+
+	if (rpl->status != CPL_ERR_NONE) {
+		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u",
+		    __func__, rpl->status, stid);
+	} else {
+		INP_WLOCK(inp);
+		KASSERT(listen_hash_del(td, lctx->inp) == NULL,
+		    ("%s: inp %p still in listen hash", __func__, inp));
+		if (release_lctx(td, lctx) != NULL)
+			INP_WUNLOCK(inp);
+	}
+
+	m_freem(m);
+	return (0);
+}
+
+/*
+ * Process a CPL_PASS_OPEN_RPL message.  Remove the lctx from the listen hash
+ * table and free it if there was any error, otherwise nothing to do.
+ */
+static int
+do_pass_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+       	struct cpl_pass_open_rpl *rpl = mtod(m, void *);
+	int stid = GET_TID(rpl);
+	struct listen_ctx *lctx;
+	struct inpcb *inp;
+
+	/*
+	 * We get these replies also when setting up HW filters.  Just throw
+	 * those away.
+	 */
+	if (stid >= td->tid_maps.stid_base + td->tid_maps.nstids)
+		goto done;
+
+	lctx = lookup_stid(&td->tid_maps, stid);
+	inp = lctx->inp;
+
+	INP_WLOCK(inp);
+
+	CTR4(KTR_CXGB, "%s: stid %u, status %u, flags 0x%x",
+	    __func__, stid, rpl->status, lctx->flags);
+
+	lctx->flags &= ~LCTX_RPL_PENDING;
+
+	if (rpl->status != CPL_ERR_NONE) {
+		log(LOG_ERR, "%s: %s: hw listen (stid %d) failed: %d\n",
+		    __func__, device_get_nameunit(sc->dev), stid, rpl->status);
+	}
+
+#ifdef INVARIANTS
+	/*
+	 * If the inp has been dropped (listening socket closed) then
+	 * listen_stop must have run and taken the inp out of the hash.
+	 */
+	if (inp->inp_flags & INP_DROPPED) {
+		KASSERT(listen_hash_del(td, inp) == NULL,
+		    ("%s: inp %p still in listen hash", __func__, inp));
+	}
+#endif
+
+	if (inp->inp_flags & INP_DROPPED && rpl->status != CPL_ERR_NONE) {
+		if (release_lctx(td, lctx) != NULL)
+			INP_WUNLOCK(inp);
+		goto done;
+	}
+
+	/*
+	 * Listening socket stopped listening earlier and now the chip tells us
+	 * it has started the hardware listener.  Stop it; the lctx will be
+	 * released in do_close_server_rpl.
+	 */
+	if (inp->inp_flags & INP_DROPPED) {
+		destroy_server(sc, lctx);
+		INP_WUNLOCK(inp);
+		goto done;
+	}
+
+	/*
+	 * Failed to start hardware listener.  Take inp out of the hash and
+	 * release our reference on it.  An error message has been logged
+	 * already.
+	 */
+	if (rpl->status != CPL_ERR_NONE) {
+		listen_hash_del(td, inp);
+		if (release_lctx(td, lctx) != NULL)
+			INP_WUNLOCK(inp);
+		goto done;
+	}
+
+	/* hardware listener open for business */
+
+	INP_WUNLOCK(inp);
+done:
+	m_freem(m);
+	return (0);
+}
+
+static void
+pass_accept_req_to_protohdrs(const struct cpl_pass_accept_req *cpl,
+    struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to)
+{
+	const struct tcp_options *t3opt = &cpl->tcp_options;
+
+	bzero(inc, sizeof(*inc));
+	inc->inc_faddr.s_addr = cpl->peer_ip;
+	inc->inc_laddr.s_addr = cpl->local_ip;
+	inc->inc_fport = cpl->peer_port;
+	inc->inc_lport = cpl->local_port;
+
+	bzero(th, sizeof(*th));
+	th->th_sport = cpl->peer_port;
+	th->th_dport = cpl->local_port;
+	th->th_seq = be32toh(cpl->rcv_isn); /* as in tcp_fields_to_host */
+	th->th_flags = TH_SYN;
+
+	bzero(to, sizeof(*to));
+	if (t3opt->mss) {
+		to->to_flags |= TOF_MSS;
+		to->to_mss = be16toh(t3opt->mss);
+	}
+	if (t3opt->wsf) {
+		to->to_flags |= TOF_SCALE;
+		to->to_wscale = t3opt->wsf;
+	}
+	if (t3opt->tstamp)
+		to->to_flags |= TOF_TS;
+	if (t3opt->sack)
+		to->to_flags |= TOF_SACKPERM;
+}
+
+static inline void
+hold_synqe(struct synq_entry *synqe)
+{
+
+	refcount_acquire(&synqe->refcnt);
+}
+
+static inline void
+release_synqe(struct synq_entry *synqe)
+{
+
+	if (refcount_release(&synqe->refcnt))
+		m_freem(synqe->m);
+}
+
+/*
+ * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
+ * store some state temporarily.  There will be enough room in the mbuf's
+ * trailing space as the CPL is not that large.
+ *
+ * XXX: bad hack.
+ */
+static struct synq_entry *
+mbuf_to_synq_entry(struct mbuf *m)
+{
+	int len = roundup(sizeof (struct synq_entry), 8);
+	uint8_t *buf;
+	int buflen;
+
+	if (__predict_false(M_TRAILINGSPACE(m) < len)) {
+	    panic("%s: no room for synq_entry (%td, %d)\n", __func__,
+	    M_TRAILINGSPACE(m), len);
+	}
+
+	if (m->m_flags & M_EXT) {
+		buf = m->m_ext.ext_buf;
+		buflen = m->m_ext.ext_size;
+	} else if (m->m_flags & M_PKTHDR) {
+		buf = &m->m_pktdat[0];
+		buflen = MHLEN;
+	} else {
+		buf = &m->m_dat[0];
+		buflen = MLEN;
+	}
+
+	return ((void *)(buf + buflen - len));
+}
+
+#ifdef KTR
+#define REJECT_PASS_ACCEPT()	do { \
+	reject_reason = __LINE__; \
+	goto reject; \
+} while (0)
+#else
+#define REJECT_PASS_ACCEPT()	do { goto reject; } while (0)
+#endif
+
+/*
+ * The context associated with a tid entry via insert_tid could be a synq_entry
+ * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
+ */
+CTASSERT(offsetof(struct toepcb, tp_flags) == offsetof(struct synq_entry, flags));
+
+/*
+ * Handle a CPL_PASS_ACCEPT_REQ message.
+ */
+static int
+do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct toedev *tod = &td->tod;
+	const struct cpl_pass_accept_req *req = mtod(m, void *);
+	unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
+	unsigned int tid = GET_TID(req);
+	struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid);
+	struct l2t_entry *e = NULL;
+	struct sockaddr_in nam;
+	struct rtentry *rt;
+	struct inpcb *inp;
+	struct socket *so;
+	struct port_info *pi;
+	struct ifnet *ifp;
+	struct in_conninfo inc;
+	struct tcphdr th;
+	struct tcpopt to;
+	struct synq_entry *synqe = NULL;
+	int i;
+#ifdef KTR
+	int reject_reason;
+#endif
+
+	CTR4(KTR_CXGB, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
+	    lctx);
+
+	pass_accept_req_to_protohdrs(req, &inc, &th, &to);
+
+	/*
+	 * Don't offload if the interface that received the SYN doesn't have
+	 * IFCAP_TOE enabled.
+	 */
+	pi = NULL;
+	for_each_port(sc, i) {
+		if (memcmp(sc->port[i].hw_addr, req->dst_mac, ETHER_ADDR_LEN))
+			continue;
+		pi = &sc->port[i];
+		break;
+	}
+	if (pi == NULL)
+		REJECT_PASS_ACCEPT();
+	ifp = pi->ifp;
+	if ((ifp->if_capenable & IFCAP_TOE4) == 0)
+		REJECT_PASS_ACCEPT();
+
+	/*
+	 * Don't offload if the outgoing interface for the route back to the
+	 * peer is not the same as the interface that received the SYN.
+	 */
+	bzero(&nam, sizeof(nam));
+	nam.sin_len = sizeof(nam);
+	nam.sin_family = AF_INET;
+	nam.sin_addr = inc.inc_faddr;
+	rt = rtalloc1((struct sockaddr *)&nam, 0, 0);
+	if (rt == NULL)
+		REJECT_PASS_ACCEPT();
+	else {
+		struct sockaddr *nexthop;
+
+		RT_UNLOCK(rt);
+		nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway :
+		    (struct sockaddr *)&nam;
+		if (rt->rt_ifp == ifp)
+			e = t3_l2t_get(pi, rt->rt_ifp, nexthop);
+		RTFREE(rt);
+		if (e == NULL)
+			REJECT_PASS_ACCEPT();	/* no l2te, or ifp mismatch */
+	}
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+
+	/* Don't offload if the 4-tuple is already in use */
+	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		REJECT_PASS_ACCEPT();
+	}
+
+	inp = lctx->inp;	/* listening socket (not owned by the TOE) */
+	INP_WLOCK(inp);
+	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
+		/*
+		 * The listening socket has closed.  The reply from the TOE to
+		 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
+		 * resources tied to this listen context.
+		 */
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		REJECT_PASS_ACCEPT();
+	}
+	so = inp->inp_socket;
+
+	/* Reuse the mbuf that delivered the CPL to us */
+	synqe = mbuf_to_synq_entry(m);
+	synqe->flags = TP_IS_A_SYNQ_ENTRY;
+	synqe->m = m;
+	synqe->lctx = lctx;
+	synqe->tid = tid;
+	synqe->e = e;
+	synqe->opt0h = calc_opt0h(so, 0, 0, e);
+	synqe->qset = pi->first_qset + (arc4random() % pi->nqsets);
+	SOCKBUF_LOCK(&so->so_rcv);
+	synqe->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	refcount_init(&synqe->refcnt, 1);
+	atomic_store_rel_int(&synqe->reply, RPL_OK);
+
+	insert_tid(td, synqe, tid);
+	TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
+	hold_synqe(synqe);
+	hold_lctx(lctx);
+
+	/* syncache_add releases both pcbinfo and pcb locks */
+	toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
+	INP_UNLOCK_ASSERT(inp);
+	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+
+	/*
+	 * If we replied during syncache_add (reply is RPL_DONE), good.
+	 * Otherwise (reply is unchanged - RPL_OK) it's no longer ok to reply.
+	 * The mbuf will stick around as long as the entry is in the syncache.
+	 * The kernel is free to retry syncache_respond but we'll ignore it due
+	 * to RPL_DONT.
+	 */
+	if (atomic_cmpset_int(&synqe->reply, RPL_OK, RPL_DONT)) {
+
+		INP_WLOCK(inp);
+		if (__predict_false(inp->inp_flags & INP_DROPPED)) {
+			/* listener closed.  synqe must have been aborted. */
+			KASSERT(synqe->flags & TP_ABORT_SHUTDOWN,
+			    ("%s: listener %p closed but synqe %p not aborted",
+			    __func__, inp, synqe));
+
+			CTR5(KTR_CXGB,
+			    "%s: stid %u, tid %u, lctx %p, synqe %p, ABORTED",
+			    __func__, stid, tid, lctx, synqe);
+			INP_WUNLOCK(inp);
+			release_synqe(synqe);
+			return (__LINE__);
+		}
+
+		KASSERT(!(synqe->flags & TP_ABORT_SHUTDOWN),
+		    ("%s: synqe %p aborted, but listener %p not dropped.",
+		    __func__, synqe, inp));
+
+		TAILQ_REMOVE(&lctx->synq, synqe, link);
+		release_synqe(synqe);	/* removed from synq list */
+		inp = release_lctx(td, lctx);
+		if (inp)
+			INP_WUNLOCK(inp);
+
+		release_synqe(synqe);	/* about to exit function */
+		REJECT_PASS_ACCEPT();
+	}
+
+	KASSERT(synqe->reply == RPL_DONE,
+	    ("%s: reply %d", __func__, synqe->reply));
+
+	CTR3(KTR_CXGB, "%s: stid %u, tid %u, OK", __func__, stid, tid);
+	release_synqe(synqe);
+	return (0);
+
+reject:
+	CTR4(KTR_CXGB, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
+	    reject_reason);
+
+	if (synqe == NULL)
+		m_freem(m);
+	if (e)
+		l2t_release(td->l2t, e);
+	queue_tid_release(tod, tid);
+
+	return (0);
+}
+
+static void
+pass_establish_to_protohdrs(const struct cpl_pass_establish *cpl,
+    struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to)
+{
+	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
+
+	bzero(inc, sizeof(*inc));
+	inc->inc_faddr.s_addr = cpl->peer_ip;
+	inc->inc_laddr.s_addr = cpl->local_ip;
+	inc->inc_fport = cpl->peer_port;
+	inc->inc_lport = cpl->local_port;
+
+	bzero(th, sizeof(*th));
+	th->th_sport = cpl->peer_port;
+	th->th_dport = cpl->local_port;
+	th->th_flags = TH_ACK;
+	th->th_seq = be32toh(cpl->rcv_isn); /* as in tcp_fields_to_host */
+	th->th_ack = be32toh(cpl->snd_isn); /* ditto */
+
+	bzero(to, sizeof(*to));
+	if (G_TCPOPT_TSTAMP(tcp_opt))
+		to->to_flags |= TOF_TS;
+}
+
+/*
+ * Process a CPL_PASS_ESTABLISH message.  The T3 has already established a
+ * connection and we need to do the software side setup.
+ */
+static int
+do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct cpl_pass_establish *cpl = mtod(m, void *);
+	struct toedev *tod = &td->tod;
+	unsigned int tid = GET_TID(cpl);
+	struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
+	struct toepcb *toep;
+	struct socket *so;
+	struct listen_ctx *lctx = synqe->lctx;
+	struct inpcb *inp = lctx->inp;
+	struct tcpopt to;
+	struct tcphdr th;
+	struct in_conninfo inc;
+#ifdef KTR
+	int stid = G_PASS_OPEN_TID(ntohl(cpl->tos_tid));
+#endif
+
+	CTR5(KTR_CXGB, "%s: stid %u, tid %u, lctx %p, inp_flags 0x%x",
+	    __func__, stid, tid, lctx, inp->inp_flags);
+
+	KASSERT(qs->idx == synqe->qset,
+	    ("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));
+
+	INP_INFO_WLOCK(&V_tcbinfo);	/* for syncache_expand */
+	INP_WLOCK(inp);
+
+	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
+		/*
+		 * The listening socket has closed.  The TOM must have aborted
+		 * all the embryonic connections (including this one) that were
+		 * on the lctx's synq.  do_abort_rpl for the tid is responsible
+		 * for cleaning up.
+		 */
+		KASSERT(synqe->flags & TP_ABORT_SHUTDOWN,
+		    ("%s: listen socket dropped but tid %u not aborted.",
+		    __func__, tid));
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		m_freem(m);
+		return (0);
+	}
+
+	pass_establish_to_protohdrs(cpl, &inc, &th, &to);
+
+	/* Lie in order to pass the checks in syncache_expand */
+	to.to_tsecr = synqe->ts;
+	th.th_ack = synqe->iss + 1;
+
+	toep = toepcb_alloc(tod);
+	if (toep == NULL) {
+reset:
+		t3_send_reset_synqe(tod, synqe);
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		m_freem(m);
+		return (0);
+	}
+	toep->tp_qset = qs->idx;
+	toep->tp_l2t = synqe->e;
+	toep->tp_tid = tid;
+	toep->tp_rx_credits = synqe->rx_credits;
+
+	synqe->toep = toep;
+	synqe->cpl = cpl;
+
+	so = inp->inp_socket;
+	if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
+		toepcb_free(toep);
+		goto reset;
+	}
+
+	/* Remove the synq entry and release its reference on the lctx */
+	TAILQ_REMOVE(&lctx->synq, synqe, link);
+	inp = release_lctx(td, lctx);
+	if (inp)
+		INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	release_synqe(synqe);
+
+	m_freem(m);
+	return (0);
+}
+
+void
+t3_init_listen_cpl_handlers(struct adapter *sc)
+{
+	t3_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl);
+	t3_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
+	t3_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
+	t3_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish);
+}
+
+/*
+ * Start a listening server by sending a passive open request to HW.
+ *
+ * Can't take adapter lock here and access to sc->flags, sc->open_device_map,
+ * sc->offload_map, if_capenable are all race prone.
+ */
+int
+t3_listen_start(struct toedev *tod, struct tcpcb *tp)
+{
+	struct tom_data *td = t3_tomdata(tod);
+	struct adapter *sc = tod->tod_softc;
+	struct port_info *pi;
+	struct inpcb *inp = tp->t_inpcb;
+	struct listen_ctx *lctx;
+	int i;
+
+	INP_WLOCK_ASSERT(inp);
+
+	if ((inp->inp_vflag & INP_IPV4) == 0)
+		return (0);
+
+#ifdef notyet
+	ADAPTER_LOCK(sc);
+	if (IS_BUSY(sc)) {
+		log(LOG_ERR, "%s: listen request ignored, %s is busy",
+		    __func__, device_get_nameunit(sc->dev));
+		goto done;
+	}
+
+	KASSERT(sc->flags & TOM_INIT_DONE,
+	    ("%s: TOM not initialized", __func__));
+#endif
+
+	if ((sc->open_device_map & sc->offload_map) == 0)
+		goto done;	/* no port that's UP with IFCAP_TOE enabled */
+
+	/*
+	 * Find a running port with IFCAP_TOE4.  We'll use the first such port's
+	 * queues to send the passive open and receive the reply to it.
+	 *
+	 * XXX: need a way to mark an port in use by offload.  if_cxgbe should
+	 * then reject any attempt to bring down such a port (and maybe reject
+	 * attempts to disable IFCAP_TOE on that port too?).
+	 */
+	for_each_port(sc, i) {
+		if (isset(&sc->open_device_map, i) &&
+		    sc->port[i].ifp->if_capenable & IFCAP_TOE4)
+				break;
+	}
+	KASSERT(i < sc->params.nports,
+	    ("%s: no running port with TOE capability enabled.", __func__));
+	pi = &sc->port[i];
+
+	if (listen_hash_find(td, inp) != NULL)
+		goto done;	/* already setup */
+
+	lctx = alloc_lctx(td, inp, pi->first_qset);
+	if (lctx == NULL) {
+		log(LOG_ERR,
+		    "%s: listen request ignored, %s couldn't allocate lctx\n",
+		    __func__, device_get_nameunit(sc->dev));
+		goto done;
+	}
+	listen_hash_add(td, lctx);
+
+	CTR5(KTR_CXGB, "%s: stid %u (%s), lctx %p, inp %p", __func__,
+	    lctx->stid, tcpstates[tp->t_state], lctx, inp);
+
+	if (create_server(sc, lctx) != 0) {
+		log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__,
+		    device_get_nameunit(sc->dev));
+		(void) listen_hash_del(td, inp);
+		inp = release_lctx(td, lctx);
+		/* can't be freed, host stack has a reference */
+		KASSERT(inp != NULL, ("%s: inp freed", __func__));
+		goto done;
+	}
+	lctx->flags |= LCTX_RPL_PENDING;
+done:
+#ifdef notyet
+	ADAPTER_UNLOCK(sc);
+#endif
+	return (0);
 }
 
 /*
  * Stop a listening server by sending a close_listsvr request to HW.
  * The server TID is freed when we get the reply.
  */
+int
+t3_listen_stop(struct toedev *tod, struct tcpcb *tp)
+{
+	struct listen_ctx *lctx;
+	struct adapter *sc = tod->tod_softc;
+	struct tom_data *td = t3_tomdata(tod);
+	struct inpcb *inp = tp->t_inpcb;
+	struct synq_entry *synqe;
+
+	INP_WLOCK_ASSERT(inp);
+
+	lctx = listen_hash_del(td, inp);
+	if (lctx == NULL)
+		return (ENOENT);	/* no hardware listener for this inp */
+
+	CTR4(KTR_CXGB, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
+	    lctx, lctx->flags);
+
+	/*
+	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
+	 * arrive and clean up when it does.
+	 */
+	if (lctx->flags & LCTX_RPL_PENDING) {
+		KASSERT(TAILQ_EMPTY(&lctx->synq),
+		    ("%s: synq not empty.", __func__));
+		return (EINPROGRESS);
+	}
+
+	/*
+	 * The host stack will abort all the connections on the listening
+	 * socket's so_comp.  It doesn't know about the connections on the synq
+	 * so we need to take care of those.
+	 */
+	TAILQ_FOREACH(synqe, &lctx->synq, link) {
+		KASSERT(synqe->lctx == lctx, ("%s: synq corrupt", __func__));
+		t3_send_reset_synqe(tod, synqe);
+	}
+
+	destroy_server(sc, lctx);
+	return (0);
+}
+
 void
-t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
+t3_syncache_added(struct toedev *tod __unused, void *arg)
 {
+	struct synq_entry *synqe = arg;
+
+	hold_synqe(synqe);
+}
+
+void
+t3_syncache_removed(struct toedev *tod __unused, void *arg)
+{
+	struct synq_entry *synqe = arg;
+
+	release_synqe(synqe);
+}
+
+/* XXX */
+extern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
+
+int
+t3_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
+{
+	struct adapter *sc = tod->tod_softc;
+	struct synq_entry *synqe = arg;
+	struct l2t_entry *e = synqe->e;
+	struct ip *ip = mtod(m, struct ip *);
+	struct tcphdr *th = (void *)(ip + 1);
+	struct cpl_pass_accept_rpl *rpl;
+	struct mbuf *r;
+	struct listen_ctx *lctx = synqe->lctx;
+	struct tcpopt to;
+	int mtu_idx, cpu_idx;
+
+	/*
+	 * The first time we run it's during the call to syncache_add.  That's
+	 * the only one we care about.
+	 */
+	if (atomic_cmpset_int(&synqe->reply, RPL_OK, RPL_DONE) == 0)
+		goto done;	/* reply to the CPL only if it's ok to do so */
+
+	r = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, rpl);
+	if (r == NULL)
+		goto done;
+
+	/*
+	 * Use only the provided mbuf (with ip and tcp headers) and what's in
+	 * synqe.  Avoid looking at the listening socket (lctx->inp) here.
+	 *
+	 * XXX: if the incoming SYN had the TCP timestamp option but the kernel
+	 * decides it doesn't want to use TCP timestamps we have no way of
+	 * relaying this info to the chip on a per-tid basis (all we have is a
+	 * global knob).
+	 */
+	bzero(&to, sizeof(to));
+	tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
+	    TO_SYN);
+
+	/* stash them for later */
+	synqe->iss = be32toh(th->th_seq);
+	synqe->ts = to.to_tsval;
+
+	mtu_idx = find_best_mtu_idx(sc, NULL, to.to_mss);
+	cpu_idx = sc->rrss_map[synqe->qset];
+
+	rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	rpl->wr.wrh_lo = 0;
+	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, synqe->tid));
+	rpl->opt2 = calc_opt2(cpu_idx);
+	rpl->rsvd = rpl->opt2;		/* workaround for HW bug */
+	rpl->peer_ip = ip->ip_dst.s_addr;
+	rpl->opt0h = synqe->opt0h |
+	    calc_opt0h(NULL, mtu_idx, to.to_wscale, NULL);
+	rpl->opt0l_status = htobe32(CPL_PASS_OPEN_ACCEPT) |
+	    calc_opt0l(NULL, synqe->rx_credits);
+
+	l2t_send(sc, r, e);
+done:
+	m_freem(m);
+	return (0);
+}
+
+int
+do_abort_req_synqe(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct toedev *tod = &td->tod;
+	const struct cpl_abort_req_rss *req = mtod(m, void *);
+	unsigned int tid = GET_TID(req);
+	struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
+	struct listen_ctx *lctx = synqe->lctx;
+	struct inpcb *inp = lctx->inp;
+
+	KASSERT(synqe->flags & TP_IS_A_SYNQ_ENTRY,
+	    ("%s: !SYNQ_ENTRY", __func__));
+
+	CTR6(KTR_CXGB, "%s: tid %u, synqe %p (%x), lctx %p, status %d",
+	    __func__, tid, synqe, synqe->flags, synqe->lctx, req->status);
+
+	INP_WLOCK(inp);
+
+	if (!(synqe->flags & TP_ABORT_REQ_RCVD)) {
+		synqe->flags |= TP_ABORT_REQ_RCVD;
+		synqe->flags |= TP_ABORT_SHUTDOWN;
+		INP_WUNLOCK(inp);
+		m_freem(m);
+		return (0);
+	}
+	synqe->flags &= ~TP_ABORT_REQ_RCVD;
+
+	/*
+	 * If we'd sent a reset on this synqe, we'll ignore this and clean up in
+	 * the T3's reply to our reset instead.
+	 */
+	if (synqe->flags & TP_ABORT_RPL_PENDING) {
+		synqe->flags |= TP_ABORT_RPL_SENT;
+		INP_WUNLOCK(inp);
+	} else {
+		TAILQ_REMOVE(&lctx->synq, synqe, link);
+		inp = release_lctx(td, lctx);
+		if (inp)
+			INP_WUNLOCK(inp);
+		release_tid(tod, tid, qs->idx);
+		l2t_release(td->l2t, synqe->e);
+		release_synqe(synqe);
+	}
+
+	send_abort_rpl(tod, tid, qs->idx);
+	m_freem(m);
+	return (0);
+}
+
+int
+do_abort_rpl_synqe(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+	struct adapter *sc = qs->adap;
+	struct tom_data *td = sc->tom_softc;
+	struct toedev *tod = &td->tod;
+	const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
+	unsigned int tid = GET_TID(rpl);
+	struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
+	struct listen_ctx *lctx = synqe->lctx;
+	struct inpcb *inp = lctx->inp;
+
+	CTR3(KTR_CXGB, "%s: tid %d, synqe %p, status %d", tid, synqe,
+	    rpl->status);
+
+	INP_WLOCK(inp);
+
+	if (synqe->flags & TP_ABORT_RPL_PENDING) {
+		if (!(synqe->flags & TP_ABORT_RPL_RCVD)) {
+			synqe->flags |= TP_ABORT_RPL_RCVD;
+			INP_WUNLOCK(inp);
+		} else {
+			synqe->flags &= ~TP_ABORT_RPL_RCVD;
+			synqe->flags &= TP_ABORT_RPL_PENDING;
+
+			TAILQ_REMOVE(&lctx->synq, synqe, link);
+			inp = release_lctx(td, lctx);
+			if (inp)
+				INP_WUNLOCK(inp);
+			release_tid(tod, tid, qs->idx);
+			l2t_release(td->l2t, synqe->e);
+			release_synqe(synqe);
+		}
+	}
+
+	m_freem(m);
+	return (0);
+}
+
+static void
+t3_send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
+{
+	struct cpl_abort_req *req;
+	unsigned int tid = synqe->tid;
+	struct adapter *sc = tod->tod_softc;
 	struct mbuf *m;
-	struct cpl_close_listserv_req *req;
-	struct listen_ctx *lctx;
-	int stid = listen_hash_del(TOM_DATA(dev), so);
-	
-	if (stid < 0)
+#ifdef INVARIANTS
+	struct listen_ctx *lctx = synqe->lctx;
+	struct inpcb *inp = lctx->inp;
+#endif
+
+	INP_WLOCK_ASSERT(inp);
+
+	CTR4(KTR_CXGB, "%s: tid %d, synqe %p (%x)", __func__, tid, synqe,
+	    synqe->flags);
+
+	if (synqe->flags & TP_ABORT_SHUTDOWN)
 		return;
 
-	lctx = cxgb_get_lctx(cdev, stid);
-	/*
-	 * Do this early so embryonic connections are marked as being aborted
-	 * while the stid is still open.  This ensures pass_establish messages
-	 * that arrive while we are closing the server will be able to locate
-	 * the listening socket.
-	 */
-	t3_reset_synq(lctx);
+	synqe->flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
 
-	/* Send the close ASAP to stop further passive opens */
-	m = m_gethdr(M_NOWAIT, MT_DATA);
-	if (m == NULL) {
-		/*
-		 * XXX allocate from lowmem cache
-		 */
-	}
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
+	m = M_GETHDR_OFLD(synqe->qset, CPL_PRIORITY_DATA, req);
+	if (m == NULL)
+		CXGB_UNIMPLEMENTED();
 
-	req = mtod(m, struct cpl_close_listserv_req *);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
-	req->cpu_idx = 0;
-	m_set_priority(m, CPL_PRIORITY_LISTEN);
-	cxgb_ofld_send(cdev, m);
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+	req->wr.wrh_lo = htonl(V_WR_TID(tid));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
+	req->rsvd0 = 0;
+	req->rsvd1 = !(synqe->flags & TP_DATASENT);
+	req->cmd = CPL_ABORT_SEND_RST;
 
-	t3_disconnect_acceptq(so);
+	l2t_send(sc, m, synqe->e);
 }
+
+void
+t3_offload_socket(struct toedev *tod, void *arg, struct socket *so)
+{
+	struct adapter *sc = tod->tod_softc;
+	struct tom_data *td = sc->tom_softc;
+	struct synq_entry *synqe = arg;
+#ifdef INVARIANTS
+	struct inpcb *inp = sotoinpcb(so);
+#endif
+	struct cpl_pass_establish *cpl = synqe->cpl;
+	struct toepcb *toep = synqe->toep;
+
+	INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
+	INP_WLOCK_ASSERT(inp);
+
+	offload_socket(so, toep);
+	make_established(so, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
+	update_tid(td, toep, synqe->tid);
+}
+#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,181 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-
-$FreeBSD$
-
-***************************************************************************/
-
-#ifndef T3_DDP_H
-#define T3_DDP_H
-
-/* Should be 1 or 2 indicating single or double kernel buffers. */
-#define NUM_DDP_KBUF 2
-
-/* min receive window for a connection to be considered for DDP */
-#define MIN_DDP_RCV_WIN (48 << 10)
-
-/* amount of Rx window not available to DDP to avoid window exhaustion */
-#define DDP_RSVD_WIN (16 << 10)
-
-/* # of sentinel invalid page pods at the end of a group of valid page pods */
-#define NUM_SENTINEL_PPODS 0
-
-/* # of pages a pagepod can hold without needing another pagepod */
-#define PPOD_PAGES 4
-
-/* page pods are allocated in groups of this size (must be power of 2) */
-#define PPOD_CLUSTER_SIZE 16
-
-/* for each TID we reserve this many page pods up front */
-#define RSVD_PPODS_PER_TID 1
-
-struct pagepod {
-	uint32_t pp_vld_tid;
-	uint32_t pp_pgsz_tag_color;
-	uint32_t pp_max_offset;
-	uint32_t pp_page_offset;
-	uint64_t pp_rsvd;
-	uint64_t pp_addr[5];
-};
-
-#define PPOD_SIZE sizeof(struct pagepod)
-
-#define S_PPOD_TID    0
-#define M_PPOD_TID    0xFFFFFF
-#define V_PPOD_TID(x) ((x) << S_PPOD_TID)
-
-#define S_PPOD_VALID    24
-#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID)
-#define F_PPOD_VALID    V_PPOD_VALID(1U)
-
-#define S_PPOD_COLOR    0
-#define M_PPOD_COLOR    0x3F
-#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR)
-
-#define S_PPOD_TAG    6
-#define M_PPOD_TAG    0xFFFFFF
-#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
-
-#define S_PPOD_PGSZ    30
-#define M_PPOD_PGSZ    0x3
-#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <machine/bus.h>
-
-/* DDP gather lists can specify an offset only for the first page. */
-struct ddp_gather_list {
-	unsigned int	dgl_length;
-	unsigned int	dgl_offset;
-	unsigned int	dgl_nelem;
-	vm_page_t   	dgl_pages[0];
-};
-
-struct ddp_buf_state {
-	unsigned int cur_offset;     /* offset of latest DDP notification */
-	unsigned int flags;
-	struct ddp_gather_list *gl;
-};
-
-struct ddp_state {
-	struct ddp_buf_state buf_state[2];   /* per buffer state */
-	int cur_buf;
-	unsigned short kbuf_noinval;
-	unsigned short kbuf_idx;        /* which HW buffer is used for kbuf */
-	struct ddp_gather_list *ubuf;
-	int user_ddp_pending;
-	unsigned int ubuf_nppods;       /* # of page pods for buffer 1 */
-	unsigned int ubuf_tag;
-	unsigned int ubuf_ddp_ready;
-	int cancel_ubuf;
-	int get_tcb_count;
-	unsigned int kbuf_posted;
-	unsigned int kbuf_nppods[NUM_DDP_KBUF];
-	unsigned int kbuf_tag[NUM_DDP_KBUF];
-	struct ddp_gather_list *kbuf[NUM_DDP_KBUF]; /* kernel buffer for DDP prefetch */
-};
-
-/* buf_state flags */
-enum {
-	DDP_BF_NOINVAL = 1 << 0,   /* buffer is set to NO_INVALIDATE */
-	DDP_BF_NOCOPY  = 1 << 1,   /* DDP to final dest, no copy needed */
-	DDP_BF_NOFLIP  = 1 << 2,   /* buffer flips after GET_TCB_RPL */
-	DDP_BF_PSH     = 1 << 3,   /* set in skb->flags if the a DDP was 
-	                              completed with a segment having the
-				      PSH flag set */
-	DDP_BF_NODATA  = 1 << 4,   /* buffer completed before filling */ 
-};
-
-#include <ulp/tom/cxgb_toepcb.h>
-struct sockbuf;
-
-/*
- * Returns 1 if a UBUF DMA buffer might be active.
- */
-static inline int
-t3_ddp_ubuf_pending(struct toepcb *toep)
-{
-	struct ddp_state *p = &toep->tp_ddp_state;
-
-	/* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,
-	 * but DDP_STATE() is only valid if the connection actually enabled
-	 * DDP.
-	 */
-	if (p->kbuf[0] == NULL)
-		return (0);
-
-	return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) || 
-	       (p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
-}
-
-int t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
-		   unsigned int nppods, unsigned int tag, unsigned int maxoff,
-		   unsigned int pg_off, unsigned int color);
-int t3_alloc_ppods(struct tom_data *td, unsigned int n, int *tag);
-void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
-void t3_free_ddp_gl(struct ddp_gather_list *gl);
-int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len);
-//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
-void t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock);
-int t3_post_ubuf(struct toepcb *toep, const struct uio *uio, int nonblock,
-		 int rcv_flags, int modulate, int post_kbuf);
-void t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv);
-int t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
-    const struct uio *uio, int nonblock,
-    int rcv_flags, int modulate, int post_kbuf);
-int t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock);
-void t3_cleanup_ddp(struct toepcb *toep);
-void t3_release_ddp_resources(struct toepcb *toep);
-void t3_cancel_ddpbuf(struct toepcb *, unsigned int bufidx);
-void t3_overlay_ddpbuf(struct toepcb *, unsigned int bufidx, unsigned int tag0,
-		       unsigned int tag1, unsigned int len);
-void t3_setup_ddpbufs(struct toepcb *, unsigned int len0, unsigned int offset0,
-		      unsigned int len1, unsigned int offset1,
-		      uint64_t ddp_flags, uint64_t flag_mask, int modulate);
-#endif  /* T3_DDP_H */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_tcp.h
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_tcp.h	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-
-/*-
- * Copyright (c) 2007, Chelsio Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-#ifndef CXGB_TCP_H_
-#define CXGB_TCP_H_
-#ifdef TCP_USRREQS_OVERLOAD
-struct tcpcb *cxgb_tcp_drop(struct tcpcb *tp, int errno);
-#else
-#define cxgb_tcp_drop	tcp_drop
-#endif
-void cxgb_tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip);
-struct tcpcb *cxgb_tcp_close(struct tcpcb *tp);
-
-extern struct pr_usrreqs cxgb_tcp_usrreqs;
-#ifdef INET6
-extern struct pr_usrreqs cxgb_tcp6_usrreqs;
-#endif
-
-#include <sys/sysctl.h>
-SYSCTL_DECL(_net_inet_tcp_cxgb);
-#endif  /* CXGB_TCP_H_ */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-/*-
- * Copyright (c) 2007, Chelsio Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * grab bag of accessor routines that will either be moved to netinet
- * or removed
- */
-
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/mbuf.h>
-#include <sys/sockopt.h>
-#include <sys/sockbuf.h>
-
-#include <sys/socket.h>
-
-#include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_pcb.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_offload.h>
-#include <netinet/tcp_syncache.h>
-#include <netinet/toedev.h>
-
-#include <ulp/tom/cxgb_tcp_offload.h>
-
-
-/*
- * This file contains code as a short-term staging area before it is moved in 
- * to sys/netinet/tcp_offload.c
- */
-
-void
-sockbuf_lock(struct sockbuf *sb)
-{
-
-	SOCKBUF_LOCK(sb);
-}
-
-void
-sockbuf_lock_assert(struct sockbuf *sb)
-{
-
-	SOCKBUF_LOCK_ASSERT(sb);
-}
-
-void
-sockbuf_unlock(struct sockbuf *sb)
-{
-
-	SOCKBUF_UNLOCK(sb);
-}
-
-int
-sockbuf_sbspace(struct sockbuf *sb)
-{
-
-	return (sbspace(sb));
-}
-
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-/* $FreeBSD$ */
-
-#ifndef CXGB_TCP_OFFLOAD_H_
-#define CXGB_TCP_OFFLOAD_H_
-
-struct sockbuf;
-
-void sockbuf_lock(struct sockbuf *);
-void sockbuf_lock_assert(struct sockbuf *);
-void sockbuf_unlock(struct sockbuf *);
-int  sockbuf_sbspace(struct sockbuf *);
-
-
-#endif /* CXGB_TCP_OFFLOAD_H_ */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h	Wed Jul 25 17:04:43 2012 +0300
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2007-2008, Chelsio Inc.
+ * Copyright (c) 2007-2009, Chelsio Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,7 +24,7 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h 237263 2012-06-19 07:34:13Z np $
  */
 #ifndef CXGB_TOEPCB_H_
 #define CXGB_TOEPCB_H_
@@ -32,88 +32,63 @@
 #include <sys/condvar.h>
 #include <sys/mbufq.h>
 
+#define TP_DATASENT         	(1 << 0)
+#define TP_TX_WAIT_IDLE      	(1 << 1)
+#define TP_FIN_SENT          	(1 << 2)
+#define TP_ABORT_RPL_PENDING 	(1 << 3)
+#define TP_ABORT_SHUTDOWN    	(1 << 4)
+#define TP_ABORT_RPL_RCVD    	(1 << 5)
+#define TP_ABORT_REQ_RCVD    	(1 << 6)
+#define TP_ATTACHED	    	(1 << 7)
+#define TP_CPL_DONE		(1 << 8)
+#define TP_IS_A_SYNQ_ENTRY	(1 << 9)
+#define TP_ABORT_RPL_SENT	(1 << 10)
+#define TP_SEND_FIN          	(1 << 11)
+
 struct toepcb {
-	struct toedev 		*tp_toedev;
+	TAILQ_ENTRY(toepcb) link; /* toep_list */
+	int 			tp_flags;
+	struct toedev 		*tp_tod;
 	struct l2t_entry 	*tp_l2t;
-	unsigned int 		tp_tid;
+	int			tp_tid;
 	int 			tp_wr_max;
 	int 			tp_wr_avail;
 	int 			tp_wr_unacked;
 	int 			tp_delack_mode;
-	int 			tp_mtu_idx;
 	int 			tp_ulp_mode;
-	int 			tp_qset_idx;
-	int 			tp_mss_clamp;
 	int 			tp_qset;
-	int 			tp_flags;
-	int 			tp_enqueued_bytes;
-	int 			tp_page_count;
-	int 			tp_state;
+	int 			tp_enqueued;
+	int 			tp_rx_credits;
 
-	tcp_seq 		tp_iss;
-	tcp_seq 		tp_delack_seq;
-	tcp_seq 		tp_rcv_wup;
-	tcp_seq 		tp_copied_seq;
-	uint64_t 		tp_write_seq;
+	struct inpcb 		*tp_inp;
+	struct mbuf		*tp_m_last;
 
-	volatile int 		tp_refcount;
-	vm_page_t 		*tp_pages;
-	
-	struct tcpcb 		*tp_tp;
-	struct mbuf  		*tp_m_last;
-	bus_dma_tag_t		tp_tx_dmat;
-	bus_dma_tag_t		tp_rx_dmat;
-	bus_dmamap_t		tp_dmamap;
-
-	LIST_ENTRY(toepcb) 	synq_entry;
 	struct mbuf_head 	wr_list;
 	struct mbuf_head 	out_of_order_queue;
-	struct ddp_state 	tp_ddp_state;
-	struct cv		tp_cv;
-			   
 };
 
 static inline void
 reset_wr_list(struct toepcb *toep)
 {
-
 	mbufq_init(&toep->wr_list);
 }
 
 static inline void
-purge_wr_queue(struct toepcb *toep)
-{
-	struct mbuf *m;
-	
-	while ((m = mbufq_dequeue(&toep->wr_list)) != NULL) 
-		m_freem(m);
-}
-
-static inline void
 enqueue_wr(struct toepcb *toep, struct mbuf *m)
 {
-
 	mbufq_tail(&toep->wr_list, m);
 }
 
 static inline struct mbuf *
 peek_wr(const struct toepcb *toep)
 {
-
 	return (mbufq_peek(&toep->wr_list));
 }
 
 static inline struct mbuf *
 dequeue_wr(struct toepcb *toep)
 {
-
 	return (mbufq_dequeue(&toep->wr_list));
 }
 
-#define wr_queue_walk(toep, m) \
-	for (m = peek_wr(toep); m; m = m->m_nextpkt)
-
-
-
 #endif
-
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_tom.c
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_tom.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/tom/cxgb_tom.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,261 +1,106 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_tom.c 237263 2012-06-19 07:34:13Z np $");
+
+#include "opt_inet.h"
 
 #include <sys/param.h>
-#include <sys/systm.h>
+#include <sys/types.h>
 #include <sys/kernel.h>
-#include <sys/fcntl.h>
-#include <sys/ktr.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/eventhandler.h>
-#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
 #include <sys/module.h>
-#include <sys/condvar.h>
-#include <sys/mutex.h>
 #include <sys/socket.h>
-#include <sys/sockopt.h>
-#include <sys/sockstate.h>
-#include <sys/sockbuf.h>
-#include <sys/syslog.h>
 #include <sys/taskqueue.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/toecore.h>
 
-#include <net/if.h>
-#include <net/route.h>
+#ifdef TCP_OFFLOAD
+#include "cxgb_include.h"
+#include "ulp/tom/cxgb_tom.h"
+#include "ulp/tom/cxgb_l2t.h"
+#include "ulp/tom/cxgb_toepcb.h"
 
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
+MALLOC_DEFINE(M_CXGB, "cxgb", "Chelsio T3 Offload services");
 
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
+/* Module ops */
+static int t3_tom_mod_load(void);
+static int t3_tom_mod_unload(void);
+static int t3_tom_modevent(module_t, int, void *);
 
-#include <netinet/in_pcb.h>
+/* ULD ops and helpers */
+static int t3_tom_activate(struct adapter *);
+static int t3_tom_deactivate(struct adapter *);
 
-#include <ulp/tom/cxgb_tcp_offload.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_offload.h>
-#include <netinet/tcp_fsm.h>
+static int alloc_tid_tabs(struct tid_info *, u_int, u_int, u_int, u_int, u_int);
+static void free_tid_tabs(struct tid_info *);
+static int write_smt_entry(struct adapter *, int);
+static void free_tom_data(struct tom_data *);
 
-#include <cxgb_include.h>
-
-#include <net/if_vlan_var.h>
-#include <net/route.h>
-
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_tcb.h>
-#include <cxgb_include.h>
-#include <common/cxgb_ctl_defs.h>
-#include <common/cxgb_t3_cpl.h>
-#include <cxgb_offload.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_l2t.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
-#include <ulp/tom/cxgb_tcp.h>
-
-
-TAILQ_HEAD(, adapter) adapter_list;
-static struct rwlock adapter_list_lock;
-
-static TAILQ_HEAD(, tom_data) cxgb_list;
-static struct mtx cxgb_list_lock;
-static const unsigned int MAX_ATIDS = 64 * 1024;
-static const unsigned int ATID_BASE = 0x100000;
-
-static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry);
-static void cxgb_register_listeners(void);
-static void t3c_tom_add(struct t3cdev *cdev);
-
-/*
- * Handlers for each CPL opcode
- */
-static cxgb_cpl_handler_func tom_cpl_handlers[256];
-
-
-static eventhandler_tag listen_tag;
-
-static struct offload_id t3_toe_id_tab[] = {
-	{ TOE_ID_CHELSIO_T3, 0 },
-	{ TOE_ID_CHELSIO_T3B, 0 },
-	{ TOE_ID_CHELSIO_T3C, 0 },
-	{ 0 }
+static struct uld_info tom_uld_info = {
+	.uld_id = ULD_TOM,
+	.activate = t3_tom_activate,
+	.deactivate = t3_tom_deactivate,
 };
 
-static struct tom_info t3_tom_info = {
-	.ti_attach = t3_toe_attach,
-	.ti_id_table = t3_toe_id_tab,
-	.ti_name = "Chelsio-T3"
-};
-
-struct cxgb_client t3c_tom_client = {
-	.name = "tom_cxgb3",
-	.add = t3c_tom_add,
-	.remove = NULL,
-	.handlers = tom_cpl_handlers,
-	.redirect = NULL
-};
-
-void
-cxgb_log_tcb(struct adapter *sc, unsigned int tid)
-{
-
-	char buf[TCB_SIZE];
-	uint64_t *tcb = (uint64_t *)buf;
-	int i, error;
-	struct mc7 *mem = &sc->cm;
-
-	error = t3_mc7_bd_read(mem, tid*TCB_SIZE/8, TCB_SIZE/8, tcb);
-	if (error)
-		printf("cxgb_tcb_log failed\n");
-
-
-	CTR1(KTR_CXGB, "TCB tid=%u", tid);
-	for (i = 0; i < TCB_SIZE / 32; i++) {
-
-		CTR5(KTR_CXGB, "%1d: %08x %08x %08x %08x",
-		    i, (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
-		    (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
-
-		tcb += 2;
-		CTR4(KTR_CXGB, "   %08x %08x %08x %08x",
-		    (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
-		    (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
-		tcb += 2;
-	}
-}
-
-/*
- * Add an skb to the deferred skb queue for processing from process context.
- */
-void
-t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler)
-{
-	struct tom_data *td = TOM_DATA(dev);
-
-	m_set_handler(m, handler);
-	mtx_lock(&td->deferq.lock);
-	
-	mbufq_tail(&td->deferq, m);
-	if (mbufq_len(&td->deferq) == 1)
-		taskqueue_enqueue(td->tq, &td->deferq_task);
-	mtx_lock(&td->deferq.lock);
-}
-
 struct toepcb *
-toepcb_alloc(void)
+toepcb_alloc(struct toedev *tod)
 {
 	struct toepcb *toep;
-	
-	toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT|M_ZERO);
-	
+
+	toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT | M_ZERO);
 	if (toep == NULL)
 		return (NULL);
 
-	toepcb_init(toep);
+	toep->tp_tod = tod;
+	toep->tp_wr_max = toep->tp_wr_avail = 15;
+	toep->tp_wr_unacked = 0;
+	toep->tp_delack_mode = 0;
+
 	return (toep);
 }
 
 void
-toepcb_init(struct toepcb *toep)
+toepcb_free(struct toepcb *toep)
 {
-	toep->tp_refcount = 1;
-	cv_init(&toep->tp_cv, "toep cv");
+	free(toep, M_CXGB);
 }
 
-void
-toepcb_hold(struct toepcb *toep)
-{
-	atomic_add_acq_int(&toep->tp_refcount, 1);
-}
-
-void
-toepcb_release(struct toepcb *toep)
-{
-	if (toep->tp_refcount == 1) {
-		free(toep, M_CXGB);
-		return;
-	}
-	atomic_add_acq_int(&toep->tp_refcount, -1);
-}
-
-
-/*
- * Add a T3 offload device to the list of devices we are managing.
- */
-static void
-t3cdev_add(struct tom_data *t)
-{	
-	mtx_lock(&cxgb_list_lock);
-	TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
-	mtx_unlock(&cxgb_list_lock);
-}
-
-static inline int
-cdev2type(struct t3cdev *cdev)
-{
-	int type = 0;
-
-	switch (cdev->type) {
-	case T3A:
-		type = TOE_ID_CHELSIO_T3;
-		break;
-	case T3B:
-		type = TOE_ID_CHELSIO_T3B;
-		break;
-	case T3C:
-		type = TOE_ID_CHELSIO_T3C;
-		break;
-	}
-	return (type);
-}
-
-/*
- * Allocate and initialize the TID tables.  Returns 0 on success.
- */
 static int
-init_tid_tabs(struct tid_info *t, unsigned int ntids,
-			 unsigned int natids, unsigned int nstids,
-			 unsigned int atid_base, unsigned int stid_base)
+alloc_tid_tabs(struct tid_info *t, u_int ntids, u_int natids, u_int nstids,
+    u_int atid_base, u_int stid_base)
 {
 	unsigned long size = ntids * sizeof(*t->tid_tab) +
 	    natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab);
 
-	t->tid_tab = cxgb_alloc_mem(size);
+	t->tid_tab = malloc(size, M_CXGB, M_NOWAIT | M_ZERO);
 	if (!t->tid_tab)
 		return (ENOMEM);
 
@@ -270,8 +115,8 @@
 	t->afree = NULL;
 	t->stids_in_use = t->atids_in_use = 0;
 	t->tids_in_use = 0;
-	mtx_init(&t->stid_lock, "stid", NULL, MTX_DUPOK|MTX_DEF);
-	mtx_init(&t->atid_lock, "atid", NULL, MTX_DUPOK|MTX_DEF);
+	mtx_init(&t->stid_lock, "stid", NULL, MTX_DEF);
+	mtx_init(&t->atid_lock, "atid", NULL, MTX_DEF);
 
 	/*
 	 * Setup the free lists for stid_tab and atid_tab.
@@ -286,1240 +131,266 @@
 			t->atid_tab[natids - 1].next = &t->atid_tab[natids];
 		t->afree = t->atid_tab;
 	}
-	return 0;
-}
-
-static void
-free_tid_maps(struct tid_info *t)
-{
-	mtx_destroy(&t->stid_lock);
-	mtx_destroy(&t->atid_lock);
-	cxgb_free_mem(t->tid_tab);
-}
-
-static inline void
-add_adapter(adapter_t *adap)
-{
-	rw_wlock(&adapter_list_lock);
-	TAILQ_INSERT_TAIL(&adapter_list, adap, adapter_entry);
-	rw_wunlock(&adapter_list_lock);
-}
-
-static inline void
-remove_adapter(adapter_t *adap)
-{
-	rw_wlock(&adapter_list_lock);
-	TAILQ_REMOVE(&adapter_list, adap, adapter_entry);
-	rw_wunlock(&adapter_list_lock);
-}
-
-/*
- * Populate a TID_RELEASE WR.  The mbuf must be already propely sized.
- */
-static inline void
-mk_tid_release(struct mbuf *m, unsigned int tid)
-{
-	struct cpl_tid_release *req;
-
-	m_set_priority(m, CPL_PRIORITY_SETUP);
-	req = mtod(m, struct cpl_tid_release *);
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
-}
-
-static void
-t3_process_tid_release_list(void *data, int pending)
-{
-	struct mbuf *m;
-	struct t3cdev *tdev = data;
-	struct t3c_data *td = T3C_DATA (tdev);
-
-	mtx_lock(&td->tid_release_lock);
-	while (td->tid_release_list) {
-		struct toe_tid_entry *p = td->tid_release_list;
-
-		td->tid_release_list = (struct toe_tid_entry *)p->ctx;
-		mtx_unlock(&td->tid_release_lock);
-		m = m_get(M_WAIT, MT_DATA);
-		mk_tid_release(m, p - td->tid_maps.tid_tab);
-		cxgb_ofld_send(tdev, m);
-		p->ctx = NULL;
-		mtx_lock(&td->tid_release_lock);
-	}
-	mtx_unlock(&td->tid_release_lock);
-}
-
-int
-cxgb_offload_activate(struct adapter *adapter)
-{
-	struct t3cdev *dev = &adapter->tdev;
-	int natids, err;
-	struct t3c_data *t;
-	struct tid_range stid_range, tid_range;
-	struct mtutab mtutab;
-	unsigned int l2t_capacity;
-
-	t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
-	if (!t)
-		return (ENOMEM);
-	dev->adapter = adapter;
-
-	err = (EOPNOTSUPP);
-	if (dev->ctl(dev, GET_TX_MAX_CHUNK, &t->tx_max_chunk) < 0 ||
-	    dev->ctl(dev, GET_MAX_OUTSTANDING_WR, &t->max_wrs) < 0 ||
-	    dev->ctl(dev, GET_L2T_CAPACITY, &l2t_capacity) < 0 ||
-	    dev->ctl(dev, GET_MTUS, &mtutab) < 0 ||
-	    dev->ctl(dev, GET_TID_RANGE, &tid_range) < 0 ||
-	    dev->ctl(dev, GET_STID_RANGE, &stid_range) < 0) {
-		device_printf(adapter->dev, "%s: dev->ctl check failed\n", __FUNCTION__);
-		goto out_free;
-	}
-      
-	err = (ENOMEM);
-	L2DATA(dev) = t3_init_l2t(l2t_capacity);
-	if (!L2DATA(dev)) {
-		device_printf(adapter->dev, "%s: t3_init_l2t failed\n", __FUNCTION__);
-		goto out_free;
-	}
-	natids = min(tid_range.num / 2, MAX_ATIDS);
-	err = init_tid_tabs(&t->tid_maps, tid_range.num, natids,
-			    stid_range.num, ATID_BASE, stid_range.base);
-	if (err) {	
-		device_printf(adapter->dev, "%s: init_tid_tabs failed\n", __FUNCTION__);
-		goto out_free_l2t;
-	}
-	
-	t->mtus = mtutab.mtus;
-	t->nmtus = mtutab.size;
-
-	TASK_INIT(&t->tid_release_task, 0 /* XXX? */, t3_process_tid_release_list, dev);
-	mtx_init(&t->tid_release_lock, "tid release", NULL, MTX_DUPOK|MTX_DEF);
-	t->dev = dev;
-
-	T3C_DATA (dev) = t;
-	dev->recv = process_rx;
-	dev->arp_update = t3_l2t_update;
-	/* Register netevent handler once */
-	if (TAILQ_EMPTY(&adapter_list)) {
-#if defined(CONFIG_CHELSIO_T3_MODULE)
-		if (prepare_arp_with_t3core())
-			log(LOG_ERR, "Unable to set offload capabilities\n");
-#endif
-	}
-	CTR1(KTR_CXGB, "adding adapter %p", adapter); 
-	add_adapter(adapter);
-	device_printf(adapter->dev, "offload started\n");
-	adapter->flags |= CXGB_OFLD_INIT;
-	return (0);
-
-out_free_l2t:
-	t3_free_l2t(L2DATA(dev));
-	L2DATA(dev) = NULL;
-out_free:
-	free(t, M_CXGB);
-	return (err);
-}
-
-void
-cxgb_offload_deactivate(struct adapter *adapter)
-{
-	struct t3cdev *tdev = &adapter->tdev;
-	struct t3c_data *t = T3C_DATA(tdev);
-
-	printf("removing adapter %p\n", adapter);
-	remove_adapter(adapter);
-	if (TAILQ_EMPTY(&adapter_list)) {
-#if defined(CONFIG_CHELSIO_T3_MODULE)
-		restore_arp_sans_t3core();
-#endif
-	}
-	free_tid_maps(&t->tid_maps);
-	T3C_DATA(tdev) = NULL;
-	t3_free_l2t(L2DATA(tdev));
-	L2DATA(tdev) = NULL;
-	mtx_destroy(&t->tid_release_lock);
-	free(t, M_CXGB);
-}
-
-/*
- * Sends an sk_buff to a T3C driver after dealing with any active network taps.
- */
-int
-cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m)
-{
-	int r;
-
-	r = dev->send(dev, m);
-	return r;
-}
-
-static struct ifnet *
-get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan)
-{
-	int i;
-
-	for_each_port(adapter, i) {
-#ifdef notyet		
-		const struct vlan_group *grp;
-#endif		
-		const struct port_info *p = &adapter->port[i];
-		struct ifnet *ifp = p->ifp;
-
-		if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) {
-#ifdef notyet	
-			
-			if (vlan && vlan != EVL_VLID_MASK) {
-				grp = p->vlan_grp;
-				dev = grp ? grp->vlan_devices[vlan] : NULL;
-			} else
-				while (dev->master)
-					dev = dev->master;
-#endif			
-			return (ifp);
-		}
-	}
-	return (NULL);
-}
-
-static inline void
-failover_fixup(adapter_t *adapter, int port)
-{
-	if (adapter->params.rev == 0) {
-		struct ifnet *ifp = adapter->port[port].ifp;
-		struct cmac *mac = &adapter->port[port].mac;
-		if (!(ifp->if_flags & IFF_UP)) {
-			/* Failover triggered by the interface ifdown */
-			t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset,
-				     F_TXEN);
-			t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset);
-		} else {
-			/* Failover triggered by the interface link down */
-			t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0);
-			t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset);
-			t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset,
-				     F_RXEN);
-		}
-	}
-}
-
-static int
-cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data)
-{
-	int ret = 0;
-	struct ulp_iscsi_info *uiip = data;
-
-	switch (req) {
-	case ULP_ISCSI_GET_PARAMS:
-		uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
-		uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
-		uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
-		/*
-		 * On tx, the iscsi pdu has to be <= tx page size and has to
-		 * fit into the Tx PM FIFO.
-		 */
-		uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
-				     t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
-		/* on rx, the iscsi pdu has to be < rx page size and the
-		   whole pdu + cpl headers has to fit into one sge buffer */
-		/* also check the max rx data length programmed in TP */
-		uiip->max_rxsz = min(uiip->max_rxsz,
-				     ((t3_read_reg(adapter, A_TP_PARA_REG2))
-					>> S_MAXRXDATA) & M_MAXRXDATA);
-		break;
-	case ULP_ISCSI_SET_PARAMS:
-		t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
-		break;
-	default:
-		ret = (EOPNOTSUPP);
-	}
-	return ret;
-}
-
-/* Response queue used for RDMA events. */
-#define ASYNC_NOTIF_RSPQ 0
-
-static int
-cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data)
-{
-	int ret = 0;
-
-	switch (req) {
-	case RDMA_GET_PARAMS: {
-		struct rdma_info *req = data;
-
-		req->udbell_physbase = rman_get_start(adapter->udbs_res);
-		req->udbell_len = rman_get_size(adapter->udbs_res);
-		req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT);
-		req->tpt_top  = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT);
-		req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT);
-		req->pbl_top  = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT);
-		req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT);
-		req->rqt_top  = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT);
-		req->kdb_addr =  (void *)((unsigned long)rman_get_virtual(adapter->regs_res) + A_SG_KDOORBELL);		break;
-	}
-	case RDMA_CQ_OP: {
-		struct rdma_cq_op *req = data;
-
-		/* may be called in any context */
-		mtx_lock_spin(&adapter->sge.reg_lock);
-		ret = t3_sge_cqcntxt_op(adapter, req->id, req->op,
-					req->credits);
-		mtx_unlock_spin(&adapter->sge.reg_lock);
-		break;
-	}
-	case RDMA_GET_MEM: {
-		struct ch_mem_range *t = data;
-		struct mc7 *mem;
-
-		if ((t->addr & 7) || (t->len & 7))
-			return (EINVAL);
-		if (t->mem_id == MEM_CM)
-			mem = &adapter->cm;
-		else if (t->mem_id == MEM_PMRX)
-			mem = &adapter->pmrx;
-		else if (t->mem_id == MEM_PMTX)
-			mem = &adapter->pmtx;
-		else
-			return (EINVAL);
-
-		ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf);
-		if (ret)
-			return (ret);
-		break;
-	}
-	case RDMA_CQ_SETUP: {
-		struct rdma_cq_setup *req = data;
-
-		mtx_lock_spin(&adapter->sge.reg_lock);
-		ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr,
-					  req->size, ASYNC_NOTIF_RSPQ,
-					  req->ovfl_mode, req->credits,
-					  req->credit_thres);
-		mtx_unlock_spin(&adapter->sge.reg_lock);
-		break;
-	}
-	case RDMA_CQ_DISABLE:
-		mtx_lock_spin(&adapter->sge.reg_lock);
-		ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data);
-		mtx_unlock_spin(&adapter->sge.reg_lock);
-		break;
-	case RDMA_CTRL_QP_SETUP: {
-		struct rdma_ctrlqp_setup *req = data;
-
-		mtx_lock_spin(&adapter->sge.reg_lock);
-		ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0,
-					 SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ,
-					 req->base_addr, req->size,
-					 FW_RI_TID_START, 1, 0);
-		mtx_unlock_spin(&adapter->sge.reg_lock);
-		break;
-	}
-	default:
-		ret = EOPNOTSUPP;
-	}
-	return (ret);
-}
-
-static int
-cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data)
-{
-	struct adapter *adapter = tdev2adap(tdev);
-	struct tid_range *tid;
-	struct mtutab *mtup;
-	struct iff_mac *iffmacp;
-	struct ddp_params *ddpp;
-	struct adap_ports *ports;
-	struct ofld_page_info *rx_page_info;
-	struct tp_params *tp = &adapter->params.tp;
-	int port;
-
-	switch (req) {
-	case GET_MAX_OUTSTANDING_WR:
-		*(unsigned int *)data = FW_WR_NUM;
-		break;
-	case GET_WR_LEN:
-		*(unsigned int *)data = WR_FLITS;
-		break;
-	case GET_TX_MAX_CHUNK:
-		*(unsigned int *)data = 1 << 20;  /* 1MB */
-		break;
-	case GET_TID_RANGE:
-		tid = data;
-		tid->num = t3_mc5_size(&adapter->mc5) -
-			adapter->params.mc5.nroutes -
-			adapter->params.mc5.nfilters -
-			adapter->params.mc5.nservers;
-		tid->base = 0;
-		break;
-	case GET_STID_RANGE:
-		tid = data;
-		tid->num = adapter->params.mc5.nservers;
-		tid->base = t3_mc5_size(&adapter->mc5) - tid->num -
-			adapter->params.mc5.nfilters -
-			adapter->params.mc5.nroutes;
-		break;
-	case GET_L2T_CAPACITY:
-		*(unsigned int *)data = 2048;
-		break;
-	case GET_MTUS:
-		mtup = data;
-		mtup->size = NMTUS;
-		mtup->mtus = adapter->params.mtus;
-		break;
-	case GET_IFF_FROM_MAC:
-		iffmacp = data;
-		iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr,
-					  iffmacp->vlan_tag & EVL_VLID_MASK);
-		break;
-	case GET_DDP_PARAMS:
-		ddpp = data;
-		ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT);
-		ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT);
-		ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK);
-		break;
-	case GET_PORTS:
-		ports = data;
-		ports->nports   = adapter->params.nports;
-		for_each_port(adapter, port)
-			ports->lldevs[port] = adapter->port[port].ifp;
-		break;
-	case FAILOVER:
-		port = *(int *)data;
-		t3_port_failover(adapter, port);
-		failover_fixup(adapter, port);
-		break;
-	case FAILOVER_DONE:
-		port = *(int *)data;
-		t3_failover_done(adapter, port);
-		break;
-	case FAILOVER_CLEAR:
-		t3_failover_clear(adapter);
-		break;
-	case GET_RX_PAGE_INFO:
-		rx_page_info = data;
-		rx_page_info->page_size = tp->rx_pg_size;
-		rx_page_info->num = tp->rx_num_pgs;
-		break;
-	case ULP_ISCSI_GET_PARAMS:
-	case ULP_ISCSI_SET_PARAMS:
-		if (!offload_running(adapter))
-			return (EAGAIN);
-		return cxgb_ulp_iscsi_ctl(adapter, req, data);
-	case RDMA_GET_PARAMS:
-	case RDMA_CQ_OP:
-	case RDMA_CQ_SETUP:
-	case RDMA_CQ_DISABLE:
-	case RDMA_CTRL_QP_SETUP:
-	case RDMA_GET_MEM:
-		if (!offload_running(adapter))
-			return (EAGAIN);
-		return cxgb_rdma_ctl(adapter, req, data);
-	default:
-		return (EOPNOTSUPP);
-	}
-	return 0;
-}
-
-/*
- * Allocate a TOM data structure,
- * initialize its cpl_handlers
- * and register it as a T3C client
- */
-static void
-t3c_tom_add(struct t3cdev *cdev)
-{
-	int i;
-	unsigned int wr_len;
-	struct tom_data *t;
-	struct toedev *tdev;
-	struct adap_ports *port_info;
-
-	t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
-	if (t == NULL)
-		return;
-
-	cdev->send = t3_offload_tx;
-	cdev->ctl = cxgb_offload_ctl;
-	
-	if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
-		goto out_free_tom;
-
-	port_info = malloc(sizeof(*port_info), M_CXGB, M_NOWAIT|M_ZERO);
-	if (!port_info)
-		goto out_free_tom;
-
-	if (cdev->ctl(cdev, GET_PORTS, port_info) < 0)
-		goto out_free_all;
-
-	t3_init_wr_tab(wr_len);
-	t->cdev = cdev;
-	t->client = &t3c_tom_client;
-
-	/* Register TCP offload device */
-	tdev = &t->tdev;
-	tdev->tod_ttid = cdev2type(cdev);
-	tdev->tod_lldev = cdev->lldev;
-	
-	if (register_toedev(tdev, "toe%d")) {
-		printf("unable to register offload device");
-		goto out_free_all;
-	}
-	TOM_DATA(tdev) = t;
-
-	for (i = 0; i < port_info->nports; i++) {
-		struct ifnet *ifp = port_info->lldevs[i];
-		TOEDEV(ifp) = tdev;
-
-		CTR1(KTR_TOM, "enabling toe on %p", ifp);
-		ifp->if_capabilities |= IFCAP_TOE4;
-		ifp->if_capenable |= IFCAP_TOE4;
-	}
-	t->ports = port_info;
-
-	/* Add device to the list of offload devices */
-	t3cdev_add(t);
-
-	/* Activate TCP offload device */
-	cxgb_offload_activate(TOM_DATA(tdev)->cdev->adapter);
-
-	activate_offload(tdev);
-	cxgb_register_listeners();
-	return;
-
-out_free_all:
-	printf("out_free_all fail\n");
-	free(port_info, M_CXGB);
-out_free_tom:
-	printf("out_free_tom fail\n");
-	free(t, M_CXGB);
-	return;
-}
-
-
-
-static int
-do_act_open_rpl(struct t3cdev *dev, struct mbuf *m)
-{
-	struct cpl_act_open_rpl *rpl = cplhdr(m);
-	unsigned int atid = G_TID(ntohl(rpl->atid));
-	struct toe_tid_entry *toe_tid;
-
-	toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
-	if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers &&
-		toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) {
-		return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m,
-			toe_tid->ctx);
-	} else {
-		log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
-			dev->name, CPL_ACT_OPEN_RPL);
-		return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
-	}
-}
-
-static int
-do_stid_rpl(struct t3cdev *dev, struct mbuf *m)
-{
-	union opcode_tid *p = cplhdr(m);
-	unsigned int stid = G_TID(ntohl(p->opcode_tid));
-	struct toe_tid_entry *toe_tid;
-
-	toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
-	if (toe_tid->ctx && toe_tid->client->handlers &&
-		toe_tid->client->handlers[p->opcode]) {
-		return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx);
-	} else {
-		log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
-			dev->name, p->opcode);
-		return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
-	}
-}
-
-static int
-do_hwtid_rpl(struct t3cdev *dev, struct mbuf *m)
-{
-	union opcode_tid *p = cplhdr(m);
-	unsigned int hwtid;
-	struct toe_tid_entry *toe_tid;
-	
-	DPRINTF("do_hwtid_rpl opcode=0x%x\n", p->opcode);
-	hwtid = G_TID(ntohl(p->opcode_tid));
-
-	toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
-	if (toe_tid->ctx && toe_tid->client->handlers &&
-		toe_tid->client->handlers[p->opcode]) {
-		return toe_tid->client->handlers[p->opcode]
-						(dev, m, toe_tid->ctx);
-	} else {
-		log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
-			dev->name, p->opcode);
-		return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
-	}
-}
-
-static int
-do_cr(struct t3cdev *dev, struct mbuf *m)
-{
-	struct cpl_pass_accept_req *req = cplhdr(m);
-	unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
-	struct toe_tid_entry *toe_tid;
-
-	toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
-	if (toe_tid->ctx && toe_tid->client->handlers &&
-		toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) {
-		return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]
-						(dev, m, toe_tid->ctx);
-	} else {
-		log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
-			dev->name, CPL_PASS_ACCEPT_REQ);
-		return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
-	}
-}
-
-static int
-do_abort_req_rss(struct t3cdev *dev, struct mbuf *m)
-{
-	union opcode_tid *p = cplhdr(m);
-	unsigned int hwtid = G_TID(ntohl(p->opcode_tid));
-	struct toe_tid_entry *toe_tid;
-
-	toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
-	if (toe_tid->ctx && toe_tid->client->handlers &&
-		toe_tid->client->handlers[p->opcode]) {
-		return toe_tid->client->handlers[p->opcode]
-						(dev, m, toe_tid->ctx);
-	} else {
-		struct cpl_abort_req_rss *req = cplhdr(m);
-		struct cpl_abort_rpl *rpl;
-		
-		struct mbuf *m = m_get(M_NOWAIT, MT_DATA);
-		if (!m) {
-			log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n");
-			goto out;
-		}
-
-		m_set_priority(m, CPL_PRIORITY_DATA);
-		rpl = cplhdr(m);
-		rpl->wr.wr_hi = 
-			htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
-		rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req)));
-		OPCODE_TID(rpl) =
-			htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req)));
-		rpl->cmd = req->status;
-		cxgb_ofld_send(dev, m);
- out:
-		return (CPL_RET_BUF_DONE);
-	}
-}
-
-static int
-do_act_establish(struct t3cdev *dev, struct mbuf *m)
-{
-	struct cpl_act_establish *req;
-	unsigned int atid;
-	struct toe_tid_entry *toe_tid;
-
-	req = cplhdr(m);
-	atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
-	toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
-	if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
-		toe_tid->client->handlers[CPL_ACT_ESTABLISH]) {
-		
-		return toe_tid->client->handlers[CPL_ACT_ESTABLISH]
-						(dev, m, toe_tid->ctx);
-	} else {
-	
-		log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
-			dev->name, CPL_ACT_ESTABLISH);
-		return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
-	}
-}
-
-
-static int
-do_term(struct t3cdev *dev, struct mbuf *m)
-{
-	unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff;
-	unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data));
-	struct toe_tid_entry *toe_tid;
-
-	toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
-	if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
-		toe_tid->client->handlers[opcode]) {
-		return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx);
-	} else {
-		log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
-			dev->name, opcode);
-		return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
-	}
-	return (0);
-}
-
-/*
- * Process a received packet with an unknown/unexpected CPL opcode.
- */
-static int
-do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
-	log(LOG_ERR, "%s: received bad CPL command %u\n", cdev->name,
-	    0xFF & *mtod(m, unsigned int *));
-	return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
-}
-
-/*
- * Add a new handler to the CPL dispatch table.  A NULL handler may be supplied
- * to unregister an existing handler.
- */
-void
-t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h)
-{
-	if (opcode < UCHAR_MAX)
-		tom_cpl_handlers[opcode] = h ? h : do_bad_cpl;
-	else
-		log(LOG_ERR, "Chelsio T3 TOM: handler registration for "
-		       "opcode %u failed\n", opcode);
-}
-
-/*
- * Make a preliminary determination if a connection can be offloaded.  It's OK
- * to fail the offload later if we say we can offload here.  For now this
- * always accepts the offload request unless there are IP options.
- */
-static int
-can_offload(struct toedev *dev, struct socket *so)
-{
-	struct tom_data *tomd = TOM_DATA(dev);
-	struct t3cdev *cdev = T3CDEV(dev->tod_lldev);
-	struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
-
-	return so_sotoinpcb(so)->inp_depend4.inp4_options == NULL &&
-	    tomd->conf.activated &&
-	    (tomd->conf.max_conn < 0 ||
-	     atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
-}
-
-static int
-tom_ctl(struct toedev *dev, unsigned int req, void *data)
-{
-	struct tom_data *t = TOM_DATA(dev);
-	struct t3cdev *cdev = t->cdev;
-
-	if (cdev->ctl)
-		return cdev->ctl(cdev, req, data);
-
-	return (EOPNOTSUPP);
-}
-
-/*
- * Free an active-open TID.
- */
-void *
-cxgb_free_atid(struct t3cdev *tdev, int atid)
-{
-	struct tid_info *t = &(T3C_DATA(tdev))->tid_maps;
-	union active_open_entry *p = atid2entry(t, atid);
-	void *ctx = p->toe_tid.ctx;
-
-	mtx_lock(&t->atid_lock);
-	p->next = t->afree;
-	t->afree = p;
-	t->atids_in_use--;
-	mtx_unlock(&t->atid_lock);
-
-	return ctx;
-}
-
-/*
- * Free a server TID and return it to the free pool.
- */
-void
-cxgb_free_stid(struct t3cdev *tdev, int stid)
-{
-	struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-	union listen_entry *p = stid2entry(t, stid);
-
-	mtx_lock(&t->stid_lock);
-	p->next = t->sfree;
-	t->sfree = p;
-	t->stids_in_use--;
-	mtx_unlock(&t->stid_lock);
-}
-
-/*
- * Free a server TID and return it to the free pool.
- */
-void *
-cxgb_get_lctx(struct t3cdev *tdev, int stid)
-{
-	struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-	union listen_entry *p = stid2entry(t, stid);
-
-	return (p->toe_tid.ctx);
-}
-
-void
-cxgb_insert_tid(struct t3cdev *tdev, struct cxgb_client *client,
-	void *ctx, unsigned int tid)
-{
-	struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-
-	t->tid_tab[tid].client = client;
-	t->tid_tab[tid].ctx = ctx;
-	atomic_add_int(&t->tids_in_use, 1);
-}
-
-/* use ctx as a next pointer in the tid release list */
-void
-cxgb_queue_tid_release(struct t3cdev *tdev, unsigned int tid)
-{
-	struct t3c_data *td = T3C_DATA (tdev);
-	struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid];
-	
-	CTR0(KTR_TOM, "queuing tid release\n");
-	
-	mtx_lock(&td->tid_release_lock);
-	p->ctx = td->tid_release_list;
-	td->tid_release_list = p;
-
-	if (!p->ctx)
-		taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task);
-
-	mtx_unlock(&td->tid_release_lock);
-}
-
-/*
- * Remove a tid from the TID table.  A client may defer processing its last
- * CPL message if it is locked at the time it arrives, and while the message
- * sits in the client's backlog the TID may be reused for another connection.
- * To handle this we atomically switch the TID association if it still points
- * to the original client context.
- */
-void
-cxgb_remove_tid(struct t3cdev *tdev, void *ctx, unsigned int tid)
-{
-	struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-
-	if (tid >= t->ntids)
-		panic("tid=%d >= t->ntids=%d", tid, t->ntids);
-	
-	if (tdev->type == T3A)
-		atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx);
-	else {
-		struct mbuf *m;
-
-		m = m_get(M_NOWAIT, MT_DATA);
-		if (__predict_true(m != NULL)) {
-			mk_tid_release(m, tid);
-			CTR1(KTR_CXGB, "releasing tid=%u", tid);
-			
-			cxgb_ofld_send(tdev, m);
-			t->tid_tab[tid].ctx = NULL;
-		} else
-			cxgb_queue_tid_release(tdev, tid);
-	}
-	atomic_add_int(&t->tids_in_use, -1);
-}
-
-int
-cxgb_alloc_atid(struct t3cdev *tdev, struct cxgb_client *client,
-		     void *ctx)
-{
-	int atid = -1;
-	struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-
-	mtx_lock(&t->atid_lock);
-	if (t->afree) {
-		union active_open_entry *p = t->afree;
-
-		atid = (p - t->atid_tab) + t->atid_base;
-		t->afree = p->next;
-		p->toe_tid.ctx = ctx;
-		p->toe_tid.client = client;
-		t->atids_in_use++;
-	}
-	mtx_unlock(&t->atid_lock);
-	return atid;
-}
-
-int
-cxgb_alloc_stid(struct t3cdev *tdev, struct cxgb_client *client,
-		     void *ctx)
-{
-	int stid = -1;
-	struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-
-	mtx_lock(&t->stid_lock);
-	if (t->sfree) {
-		union listen_entry *p = t->sfree;
-
-		stid = (p - t->stid_tab) + t->stid_base;
-		t->sfree = p->next;
-		p->toe_tid.ctx = ctx;
-		p->toe_tid.client = client;
-		t->stids_in_use++;
-	}
-	mtx_unlock(&t->stid_lock);
-	return stid;
-}
-
-
-static int
-is_offloading(struct ifnet *ifp)
-{
-	struct adapter *adapter;
-	int port;
-
-	rw_rlock(&adapter_list_lock);
-	TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) {
-		for_each_port(adapter, port) {
-			if (ifp == adapter->port[port].ifp) {
-				rw_runlock(&adapter_list_lock);
-				return 1;
-			}
-		}
-	}
-	rw_runlock(&adapter_list_lock);
-	return 0;
-}
-
-
-static void
-cxgb_arp_update_event(void *unused, struct rtentry *rt0,
-    uint8_t *enaddr, struct sockaddr *sa)
-{
-
-	if (!is_offloading(rt0->rt_ifp))
-		return;
-
-	RT_ADDREF(rt0);
-	RT_UNLOCK(rt0);
-	cxgb_neigh_update(rt0, enaddr, sa);
-	RT_LOCK(rt0);
-	RT_REMREF(rt0);
-}
-
-static void
-cxgb_redirect_event(void *unused, int event, struct rtentry *rt0,
-    struct rtentry *rt1, struct sockaddr *sa)
-{
-	/* 
-	 * ignore events on non-offloaded interfaces
-	 */
-	if (!is_offloading(rt0->rt_ifp))
-		return;
-
-	/*
-	 * Cannot redirect to non-offload device.
-	 */
-	if (!is_offloading(rt1->rt_ifp)) {
-		log(LOG_WARNING, "%s: Redirect to non-offload"
-		    "device ignored.\n", __FUNCTION__);
-		return;
-	}
-
-        /*
-	 * avoid LORs by dropping the route lock but keeping a reference
-	 * 
-	 */
-	RT_ADDREF(rt0);
-	RT_UNLOCK(rt0);
-	RT_ADDREF(rt1);
-	RT_UNLOCK(rt1);
-	
-	cxgb_redirect(rt0, rt1, sa);
-	cxgb_neigh_update(rt1, NULL, sa);
-
-	RT_LOCK(rt0);
-	RT_REMREF(rt0);
-	RT_LOCK(rt1);
-	RT_REMREF(rt1);
-}
-
-void
-cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa)
-{
-
-	if (rt->rt_ifp && is_offloading(rt->rt_ifp) && (rt->rt_ifp->if_flags & IFCAP_TOE)) {
-		struct t3cdev *tdev = T3CDEV(rt->rt_ifp);
-
-		PANIC_IF(!tdev);
-		t3_l2t_update(tdev, rt, enaddr, sa);
-	}
-}
-
-static void
-set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
-{
-	struct mbuf *m;
-	struct cpl_set_tcb_field *req;
-
-	m = m_gethdr(M_NOWAIT, MT_DATA);
-	if (!m) {
-		log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__);
-		return;
-	}
-	
-	m_set_priority(m, CPL_PRIORITY_CONTROL);
-	req = mtod(m, struct cpl_set_tcb_field *);
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
-	
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
-	req->reply = 0;
-	req->cpu_idx = 0;
-	req->word = htons(W_TCB_L2T_IX);
-	req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX));
-	req->val = htobe64(V_TCB_L2T_IX(e->idx));
-	tdev->send(tdev, m);
-}
-
-void
-cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa)
-{
-	struct ifnet *olddev, *newdev;
-	struct tid_info *ti;
-	struct t3cdev *tdev;
-	u32 tid;
-	int update_tcb;
-	struct l2t_entry *e;
-	struct toe_tid_entry *te;
-
-	olddev = old->rt_ifp;
-	newdev = new->rt_ifp;
-	if (!is_offloading(olddev))
-		return;
-	if (!is_offloading(newdev)) {
-		log(LOG_WARNING, "%s: Redirect to non-offload"
-		    "device ignored.\n", __FUNCTION__);
-		return;
-	}
-	tdev = T3CDEV(olddev);
-	PANIC_IF(!tdev);
-	if (tdev != T3CDEV(newdev)) {
-		log(LOG_WARNING, "%s: Redirect to different "
-		    "offload device ignored.\n", __FUNCTION__);
-		return;
-	}
-
-	/* Add new L2T entry */
-	e = t3_l2t_get(tdev, new, new->rt_ifp, sa);
-	if (!e) {
-		log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n",
-		       __FUNCTION__);
-		return;
-	}
-
-	/* Walk tid table and notify clients of dst change. */
-	ti = &(T3C_DATA (tdev))->tid_maps;
-	for (tid=0; tid < ti->ntids; tid++) {
-		te = lookup_tid(ti, tid);
-		PANIC_IF(!te);
-		if (te->ctx && te->client && te->client->redirect) {
-			update_tcb = te->client->redirect(te->ctx, old, new,
-							  e);
-			if (update_tcb)  {
-				l2t_hold(L2DATA(tdev), e);
-				set_l2t_ix(tdev, tid, e);
-			}
-		}
-	}
-	l2t_release(L2DATA(tdev), e);
-}
-
-/*
- * Initialize the CPL dispatch table.
- */
-static void
-init_cpl_handlers(void)
-{
-	int i;
-
-	for (i = 0; i < 256; ++i)
-		tom_cpl_handlers[i] = do_bad_cpl;
-
-	t3_init_listen_cpl_handlers();
-}
-
-static int
-t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
-{
-	struct tom_data *t = TOM_DATA(dev);
-	struct t3cdev *cdev = t->cdev;
-	struct ddp_params ddp;
-	struct ofld_page_info rx_page_info;
-	int err;
-	
-	t3_init_tunables(t);
-	mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF);
-	CTR2(KTR_TOM, "t3_toe_attach dev=%p entry=%p", dev, entry);
-
-	dev->tod_can_offload = can_offload;
-	dev->tod_connect = t3_connect;
-	dev->tod_ctl = tom_ctl;
-#if 0	
-	dev->tod_failover = t3_failover;
-#endif
-	err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp);
-	if (err)
-		return err;
-
-	err = cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info);
-	if (err)
-		return err;
-
-	t->ddp_llimit = ddp.llimit;
-	t->ddp_ulimit = ddp.ulimit;
-	t->pdev = ddp.pdev;
-	t->rx_page_size = rx_page_info.page_size;
-	/* OK if this fails, we just can't do DDP */
-	t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
-	t->ppod_map = malloc(t->nppods, M_DEVBUF, M_NOWAIT|M_ZERO);
-
-	mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF);
-
-
-	t3_sysctl_register(cdev->adapter, &t->conf);
 	return (0);
 }
 
 static void
-cxgb_toe_listen_start(void *unused, struct tcpcb *tp)
+free_tid_tabs(struct tid_info *t)
 {
-	struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
-	struct tom_data *p;
-	
-	mtx_lock(&cxgb_list_lock);
-	TAILQ_FOREACH(p, &cxgb_list, entry) {
-			t3_listen_start(&p->tdev, so, p->cdev);
+	if (mtx_initialized(&t->stid_lock))
+		mtx_destroy(&t->stid_lock);
+	if (mtx_initialized(&t->atid_lock))
+		mtx_destroy(&t->atid_lock);
+	free(t->tid_tab, M_CXGB);
+}
+
+static int
+write_smt_entry(struct adapter *sc, int idx)
+{
+	struct port_info *pi = &sc->port[idx];
+	struct cpl_smt_write_req *req;
+	struct mbuf *m;
+
+	m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, req);
+	if (m == NULL) {
+		log(LOG_ERR, "%s: no mbuf, can't write SMT entry for %d\n",
+		    __func__, idx);
+		return (ENOMEM);
 	}
-	mtx_unlock(&cxgb_list_lock);
+
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
+	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
+	req->iff = idx;
+	memset(req->src_mac1, 0, sizeof(req->src_mac1));
+	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
+
+	t3_offload_tx(sc, m);
+
+	return (0);
 }
 
 static void
-cxgb_toe_listen_stop(void *unused, struct tcpcb *tp)
+free_tom_data(struct tom_data *td)
 {
-	struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
-	struct tom_data *p;
-	
-	mtx_lock(&cxgb_list_lock);
-	TAILQ_FOREACH(p, &cxgb_list, entry) {
-		if (tp->t_state == TCPS_LISTEN)
-			t3_listen_stop(&p->tdev, so, p->cdev);
+	KASSERT(TAILQ_EMPTY(&td->toep_list),
+	    ("%s: toep_list not empty", __func__));
+
+	if (td->listen_mask != 0)
+		hashdestroy(td->listen_hash, M_CXGB, td->listen_mask);
+
+	if (mtx_initialized(&td->toep_list_lock))
+		mtx_destroy(&td->toep_list_lock);
+	if (mtx_initialized(&td->lctx_hash_lock))
+		mtx_destroy(&td->lctx_hash_lock);
+	if (mtx_initialized(&td->tid_release_lock))
+		mtx_destroy(&td->tid_release_lock);
+	if (td->l2t)
+		t3_free_l2t(td->l2t);
+	free_tid_tabs(&td->tid_maps);
+	free(td, M_CXGB);
+}
+
+/*
+ * Ground control to Major TOM
+ * Commencing countdown, engines on
+ */
+static int
+t3_tom_activate(struct adapter *sc)
+{
+	struct tom_data *td;
+	struct toedev *tod;
+	int i, rc = 0;
+	struct mc5_params *mc5 = &sc->params.mc5;
+	u_int ntids, natids, mtus;
+
+	ADAPTER_LOCK_ASSERT_OWNED(sc);	/* for sc->flags */
+
+	/* per-adapter softc for TOM */
+	td = malloc(sizeof(*td), M_CXGB, M_ZERO | M_NOWAIT);
+	if (td == NULL)
+		return (ENOMEM);
+
+	/* List of TOE PCBs and associated lock */
+	mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF);
+	TAILQ_INIT(&td->toep_list);
+
+	/* Listen context */
+	mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF);
+	td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGB,
+	    &td->listen_mask, HASH_NOWAIT);
+
+	/* TID release task */
+	TASK_INIT(&td->tid_release_task, 0 , t3_process_tid_release_list, td);
+	mtx_init(&td->tid_release_lock, "tid release", NULL, MTX_DEF);
+
+	/* L2 table */
+	td->l2t = t3_init_l2t(L2T_SIZE);
+	if (td->l2t == NULL) {
+		rc = ENOMEM;
+		goto done;
 	}
-	mtx_unlock(&cxgb_list_lock);
+
+	/* TID tables */
+	ntids = t3_mc5_size(&sc->mc5) - mc5->nroutes - mc5->nfilters -
+	    mc5->nservers;
+	natids = min(ntids / 2, 64 * 1024);
+	rc = alloc_tid_tabs(&td->tid_maps, ntids, natids, mc5->nservers,
+	    0x100000 /* ATID_BASE */, ntids);
+	if (rc != 0)
+		goto done;
+
+	/* CPL handlers */
+	t3_init_listen_cpl_handlers(sc);
+	t3_init_l2t_cpl_handlers(sc);
+	t3_init_cpl_io(sc);
+
+	/* toedev ops */
+	tod = &td->tod;
+	init_toedev(tod);
+	tod->tod_softc = sc;
+	tod->tod_connect = t3_connect;
+	tod->tod_listen_start = t3_listen_start;
+	tod->tod_listen_stop = t3_listen_stop;
+	tod->tod_rcvd = t3_rcvd;
+	tod->tod_output = t3_tod_output;
+	tod->tod_send_rst = t3_send_rst;
+	tod->tod_send_fin = t3_send_fin;
+	tod->tod_pcb_detach = t3_pcb_detach;
+	tod->tod_l2_update = t3_l2_update;
+	tod->tod_syncache_added = t3_syncache_added;
+	tod->tod_syncache_removed = t3_syncache_removed;
+	tod->tod_syncache_respond = t3_syncache_respond;
+	tod->tod_offload_socket = t3_offload_socket;
+
+	/* port MTUs */
+	mtus = sc->port[0].ifp->if_mtu;
+	if (sc->params.nports > 1)
+		mtus |= sc->port[1].ifp->if_mtu << 16;
+	t3_write_reg(sc, A_TP_MTU_PORT_TABLE, mtus);
+	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
+	    sc->params.rev == 0 ? sc->port[0].ifp->if_mtu : 0xffff);
+
+	/* SMT entry for each port */
+	for_each_port(sc, i) {
+		write_smt_entry(sc, i);
+		TOEDEV(sc->port[i].ifp) = &td->tod;
+	}
+
+	/* Switch TP to offload mode */
+	t3_tp_set_offload_mode(sc, 1);
+
+	sc->tom_softc = td;
+	sc->flags |= TOM_INIT_DONE;
+	register_toedev(tod);
+
+done:
+	if (rc != 0)
+		free_tom_data(td);
+
+	return (rc);
+}
+
+static int
+t3_tom_deactivate(struct adapter *sc)
+{
+	int rc = 0;
+	struct tom_data *td = sc->tom_softc;
+
+	ADAPTER_LOCK_ASSERT_OWNED(sc);	/* for sc->flags */
+
+	if (td == NULL)
+		return (0);	/* XXX. KASSERT? */
+
+	if (sc->offload_map != 0)
+		return (EBUSY);	/* at least one port has IFCAP_TOE enabled */
+
+	mtx_lock(&td->toep_list_lock);
+	if (!TAILQ_EMPTY(&td->toep_list))
+		rc = EBUSY;
+	mtx_unlock(&td->toep_list_lock);
+
+	mtx_lock(&td->lctx_hash_lock);
+	if (td->lctx_count > 0)
+		rc = EBUSY;
+	mtx_unlock(&td->lctx_hash_lock);
+
+	if (rc == 0) {
+		unregister_toedev(&td->tod);
+		t3_tp_set_offload_mode(sc, 0);
+		free_tom_data(td);
+		sc->tom_softc = NULL;
+		sc->flags &= ~TOM_INIT_DONE;
+	}
+
+	return (rc);
+}
+
+static int
+t3_tom_mod_load(void)
+{
+	int rc;
+
+	rc = t3_register_uld(&tom_uld_info);
+	if (rc != 0)
+		t3_tom_mod_unload();
+
+	return (rc);
 }
 
 static void
-cxgb_toe_listen_start_handler(struct inpcb *inp, void *arg)
+tom_uninit(struct adapter *sc, void *arg __unused)
 {
-	struct tcpcb *tp = intotcpcb(inp);
-
-	if (tp->t_state == TCPS_LISTEN)
-		cxgb_toe_listen_start(NULL, tp);
-}
-
-static void
-cxgb_register_listeners(void)
-{
-
-	inp_apply_all(cxgb_toe_listen_start_handler, NULL);
+	/* Try to free resources (works only if no port has IFCAP_TOE) */
+	ADAPTER_LOCK(sc);
+	if (sc->flags & TOM_INIT_DONE)
+		t3_deactivate_uld(sc, ULD_TOM);
+	ADAPTER_UNLOCK(sc);
 }
 
 static int
-t3_tom_init(void)
+t3_tom_mod_unload(void)
 {
-	init_cpl_handlers();
-	if (t3_init_cpl_io() < 0) {
-		log(LOG_ERR,
-		    "Unable to initialize cpl io ops\n");
-		return -1;
-	}
-	t3_init_socket_ops();
+	t3_iterate(tom_uninit, NULL);
 
-	 /* Register with the TOE device layer. */
+	if (t3_unregister_uld(&tom_uld_info) == EBUSY)
+		return (EBUSY);
 
-	if (register_tom(&t3_tom_info) != 0) {
-		log(LOG_ERR,
-		    "Unable to register Chelsio T3 TCP offload module.\n");
-		return -1;
-	}
-
-	rw_init(&adapter_list_lock, "ofld adap list");
-	TAILQ_INIT(&adapter_list);
-	EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event,
-	    NULL, EVENTHANDLER_PRI_ANY);
-	EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event,
-	    NULL, EVENTHANDLER_PRI_ANY);
-	
-	mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
-	listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
-	    cxgb_toe_listen_start, NULL, EVENTHANDLER_PRI_ANY);
-	listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
-	    cxgb_toe_listen_stop, NULL, EVENTHANDLER_PRI_ANY);
-	TAILQ_INIT(&cxgb_list);
-	
-
-
-	t3_register_cpl_handler(CPL_PASS_OPEN_RPL, do_stid_rpl);
-	t3_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_stid_rpl);
-	t3_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_cr);
-	t3_register_cpl_handler(CPL_PASS_ESTABLISH, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_ABORT_RPL_RSS, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_ABORT_RPL, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_RX_URG_NOTIFY, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_RX_DATA, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_TX_DATA_ACK, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_TX_DMA_ACK, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
-	t3_register_cpl_handler(CPL_PEER_CLOSE, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_CLOSE_CON_RPL, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req_rss);
-	t3_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
-	t3_register_cpl_handler(CPL_RDMA_TERMINATE, do_term);
-	t3_register_cpl_handler(CPL_RDMA_EC_STATUS, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_RX_DATA_DDP, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_GET_TCB_RPL, do_hwtid_rpl);
-	t3_register_cpl_handler(CPL_SET_TCB_RPL, do_hwtid_rpl);
-
-	/* Register to offloading devices */
-	cxgb_register_client(&t3c_tom_client);
-	
 	return (0);
 }
+#endif	/* ifdef TCP_OFFLOAD */
 
 static int
-t3_tom_load(module_t mod, int cmd, void *arg)
+t3_tom_modevent(module_t mod, int cmd, void *arg)
 {
-	int err = 0;
+	int rc = 0;
 
+#ifdef TCP_OFFLOAD
 	switch (cmd) {
 	case MOD_LOAD:
-		t3_tom_init();
+		rc = t3_tom_mod_load();
 		break;
-	case MOD_QUIESCE:
+
+	case MOD_UNLOAD:
+		rc = t3_tom_mod_unload();
 		break;
-	case MOD_UNLOAD:
-		printf("uhm, ... unloading isn't really supported for toe\n");
-		break;
-	case MOD_SHUTDOWN:
-		break;
+
 	default:
-		err = EOPNOTSUPP;
-		break;
+		rc = EINVAL;
 	}
-
-	return (err);
+#else
+	rc = EOPNOTSUPP;
+#endif
+	return (rc);
 }
 
-static moduledata_t mod_data= {
+static moduledata_t t3_tom_moddata= {
 	"t3_tom",
-	t3_tom_load,
+	t3_tom_modevent,
 	0
 };
+
 MODULE_VERSION(t3_tom, 1);
 MODULE_DEPEND(t3_tom, toecore, 1, 1, 1);
-MODULE_DEPEND(t3_tom, if_cxgb, 1, 1, 1);
-DECLARE_MODULE(t3_tom, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
-
+MODULE_DEPEND(t3_tom, cxgbc, 1, 1, 1);
+DECLARE_MODULE(t3_tom, t3_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY);
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_tom.h
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_tom.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgb/ulp/tom/cxgb_tom.h	Wed Jul 25 17:04:43 2012 +0300
@@ -1,7 +1,6 @@
-
 /**************************************************************************
 
-Copyright (c) 2007, Chelsio Inc.
+Copyright (c) 2007, 2009 Chelsio Inc.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -27,134 +26,254 @@
 POSSIBILITY OF SUCH DAMAGE.
 
 
-$FreeBSD$
+$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_tom.h 237263 2012-06-19 07:34:13Z np $
 
 ***************************************************************************/
 #ifndef CXGB_TOM_H_
 #define CXGB_TOM_H_
 #include <sys/protosw.h>
-#include <netinet/toedev.h>
+#include <netinet/toecore.h>
 
-#define LISTEN_INFO_HASH_SIZE 32 
+MALLOC_DECLARE(M_CXGB);
 
-struct listen_info {
-	struct listen_info *next;  /* Link to next entry */
-	struct socket *so;         /* The listening socket */
-	unsigned int stid;         /* The server TID */
-};
+#define	KTR_CXGB	KTR_SPARE3
 
+#define LISTEN_HASH_SIZE 32 
 
 /*
- * TOM tunable parameters.  They can be manipulated through sysctl(2) or /proc.
+ * Holds the size, base address, free list start, etc of the TID, server TID,
+ * and active-open TID tables for a offload device.
+ * The tables themselves are allocated dynamically.
  */
-struct tom_tunables {
-        int max_host_sndbuf;    // max host RAM consumed by a sndbuf
-        int tx_hold_thres;      // push/pull threshold for non-full TX sk_buffs
-        int max_wrs;            // max # of outstanding WRs per connection
-        int rx_credit_thres;    // min # of RX credits needed for RX_DATA_ACK
-        int cong_alg;           // Congestion control algorithm
-        int mss;                // max TX_DATA WR payload size
-        int delack;             // delayed ACK control
-        int max_conn;           // maximum number of offloaded connections
-        int soft_backlog_limit; // whether the listen backlog limit is soft
-        int ddp;                // whether to put new connections in DDP mode
-        int ddp_thres;          // min recvmsg size before activating DDP
-        int ddp_copy_limit;     // capacity of kernel DDP buffer
-        int ddp_push_wait;      // whether blocking DDP waits for PSH flag
-        int ddp_rcvcoalesce;    // whether receive coalescing is enabled
-        int zcopy_sosend_enabled; // < is never zcopied
-        int zcopy_sosend_partial_thres; // < is never zcopied
-        int zcopy_sosend_partial_copy; // bytes copied in partial zcopy
-        int zcopy_sosend_thres;// >= are mostly zcopied
-        int zcopy_sosend_copy; // bytes coped in zcopied
-        int zcopy_sosend_ret_pending_dma;// pot. return while pending DMA
-        int activated;          // TOE engine activation state
+struct tid_info {
+	void **tid_tab;
+	unsigned int ntids;
+	volatile unsigned int tids_in_use;
+
+	union listen_entry *stid_tab;
+	unsigned int nstids;
+	unsigned int stid_base;
+
+	union active_open_entry *atid_tab;
+	unsigned int natids;
+	unsigned int atid_base;
+
+	/*
+	 * The following members are accessed R/W so we put them in their own
+	 * cache lines.  TOM_XXX: actually do what is said here.
+	 *
+	 * XXX We could combine the atid fields above with the lock here since
+	 * atids are use once (unlike other tids).  OTOH the above fields are
+	 * usually in cache due to tid_tab.
+	 */
+	struct mtx atid_lock;
+	union active_open_entry *afree;
+	unsigned int atids_in_use;
+
+	struct mtx stid_lock;
+	union listen_entry *sfree;
+	unsigned int stids_in_use;
 };
 
 struct tom_data {
-        TAILQ_ENTRY(tom_data) entry;
-			      
-        struct t3cdev *cdev;
-        struct pci_dev *pdev;
-        struct toedev tdev;
+        struct toedev tod;
 
-        struct cxgb_client *client;
-        struct tom_tunables conf;
-        struct tom_sysctl_table *sysctl;
+	/*
+	 * toepcb's associated with this TOE device are either on the
+	 * toep list or in the synq of a listening socket in lctx hash.
+	 */
+	struct mtx toep_list_lock;
+	TAILQ_HEAD(, toepcb) toep_list;
+
+	struct l2t_data *l2t;
+	struct tid_info tid_maps;
 
         /*
-         * The next three locks listen_lock, deferq.lock, and tid_release_lock
-         * are used rarely so we let them potentially share a cacheline.
+	 * The next two locks listen_lock, and tid_release_lock are used rarely
+	 * so we let them potentially share a cacheline.
          */
 
-        struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
-        struct mtx listen_lock;
+	LIST_HEAD(, listen_ctx) *listen_hash;
+	u_long listen_mask;
+	int lctx_count;		/* # of lctx in the hash table */
+        struct mtx lctx_hash_lock;
 
-        struct mbuf_head deferq;
-        struct task deferq_task;
-
-        struct socket **tid_release_list;
+        void **tid_release_list;
         struct mtx tid_release_lock;
         struct task tid_release_task;
-
-        volatile int tx_dma_pending;
-	
-        unsigned int ddp_llimit;
-        unsigned int ddp_ulimit;
-
-        unsigned int rx_page_size;
-
-        u8 *ppod_map;
-        unsigned int nppods;
-        struct mtx ppod_map_lock;
-	
-        struct adap_ports *ports;
-	struct taskqueue *tq;
 };
 
+struct synq_entry {
+	TAILQ_ENTRY(synq_entry) link;	/* listen_ctx's synq link */
+	int flags;			/* same as toepcb's tp_flags */
+	int tid;
+	struct mbuf *m;			/* backpointer to containing mbuf */
+	struct listen_ctx *lctx;	/* backpointer to listen ctx */
+	struct cpl_pass_establish *cpl;
+	struct toepcb *toep;
+	struct l2t_entry *e;
+	uint32_t iss;
+	uint32_t ts;
+	uint32_t opt0h;
+	uint32_t qset;
+	int rx_credits;
+	volatile u_int refcnt;
+
+#define RPL_OK		0	/* ok to reply */
+#define RPL_DONE	1	/* replied already */
+#define RPL_DONT	2	/* don't reply */
+	volatile u_int reply;	/* see above. */
+};
+
+#define LCTX_RPL_PENDING	1	/* waiting for CPL_PASS_OPEN_RPL */
 
 struct listen_ctx {
-	struct socket *lso;
-	struct tom_data *tom_data;
-	int ulp_mode;
-	LIST_HEAD(, toepcb) synq_head;
-	
+	LIST_ENTRY(listen_ctx) link;	/* listen hash linkage */
+	volatile int refcnt;
+	int stid;
+	int flags;
+	struct inpcb *inp;		/* listening socket's inp */
+	int qset;
+	TAILQ_HEAD(, synq_entry) synq;
 };
 
-#define TOM_DATA(dev) (*(struct tom_data **)&(dev)->tod_l4opt)
-#define T3C_DEV(sk) ((TOM_DATA(TOE_DEV(sk)))->cdev)
-#define TOEP_T3C_DEV(toep) (TOM_DATA(toep->tp_toedev)->cdev)
-#define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param)
+void t3_process_tid_release_list(void *data, int pending);
 
-#define TP_DATASENT         	(1 << 0)
-#define TP_TX_WAIT_IDLE      	(1 << 1)
-#define TP_FIN_SENT          	(1 << 2)
-#define TP_ABORT_RPL_PENDING 	(1 << 3)
-#define TP_ABORT_SHUTDOWN    	(1 << 4)
-#define TP_ABORT_RPL_RCVD    	(1 << 5)
-#define TP_ABORT_REQ_RCVD    	(1 << 6)
-#define TP_CLOSE_CON_REQUESTED	(1 << 7)
-#define TP_SYN_RCVD		(1 << 8)
-#define TP_ESTABLISHED		(1 << 9)
-
-void t3_init_tunables(struct tom_data *t);
-
-void t3_sysctl_register(struct adapter *sc, const struct tom_tunables *p);
-
-static __inline struct mbuf *
-m_gethdr_nofail(int len)
+static inline struct tom_data *
+t3_tomdata(struct toedev *tod)
 {
-	struct mbuf *m;
-	
-	m = m_gethdr(M_NOWAIT, MT_DATA);
-	if (m == NULL) {
-		panic("implement lowmem cache\n");
-	}
-	
-	KASSERT(len < MHLEN, ("requested header size too large for mbuf"));	
-	m->m_pkthdr.len = m->m_len = len;
-	return (m);
+	return (member2struct(tom_data, tod, tod));
 }
 
+union listen_entry {
+	void *ctx;
+	union listen_entry *next;
+};
 
+union active_open_entry {
+	void *ctx;
+	union active_open_entry *next;
+};
+
+/*
+ * Map an ATID or STID to their entries in the corresponding TID tables.
+ */
+static inline union active_open_entry *atid2entry(const struct tid_info *t,
+                                                  unsigned int atid)
+{
+        return &t->atid_tab[atid - t->atid_base];
+}
+
+
+static inline union listen_entry *stid2entry(const struct tid_info *t,
+                                             unsigned int stid)
+{
+        return &t->stid_tab[stid - t->stid_base];
+}
+
+/*
+ * Find the connection corresponding to a TID.
+ */
+static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
+{
+	void *p;
+
+	if (tid >= t->ntids)
+		return (NULL);
+
+	p = t->tid_tab[tid];
+	if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
+		return (p);
+
+	return (NULL);
+}
+
+/*
+ * Find the connection corresponding to a server TID.
+ */
+static inline void *lookup_stid(const struct tid_info *t, unsigned int tid)
+{
+	void *p;
+
+        if (tid < t->stid_base || tid >= t->stid_base + t->nstids)
+                return (NULL);
+
+	p = stid2entry(t, tid)->ctx;
+	if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
+		return (p);
+
+	return (NULL);
+}
+
+/*
+ * Find the connection corresponding to an active-open TID.
+ */
+static inline void *lookup_atid(const struct tid_info *t, unsigned int tid)
+{
+	void *p;
+
+        if (tid < t->atid_base || tid >= t->atid_base + t->natids)
+                return (NULL);
+
+	p = atid2entry(t, tid)->ctx;
+	if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
+		return (p);
+
+	return (NULL);
+}
+
+static inline uint32_t
+calc_opt2(int cpu_idx)
+{
+	uint32_t opt2 = F_CPU_INDEX_VALID | V_CPU_INDEX(cpu_idx);
+
+	/* 3 = highspeed CC algorithm */
+	opt2 |= V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(3) |
+	    V_PACING_FLAVOR(1);
+
+	/* coalesce and push bit semantics */
+	opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(3);
+
+	return (htobe32(opt2));
+}
+
+/* cxgb_tom.c */
+struct toepcb *toepcb_alloc(struct toedev *);
+void toepcb_free(struct toepcb *);
+
+/* cxgb_cpl_io.c */
+void t3_init_cpl_io(struct adapter *);
+int t3_push_frames(struct socket *, int);
+int t3_connect(struct toedev *, struct socket *, struct rtentry *,
+    struct sockaddr *);
+int t3_tod_output(struct toedev *, struct tcpcb *);
+int t3_send_rst(struct toedev *, struct tcpcb *);
+int t3_send_fin(struct toedev *, struct tcpcb *);
+void insert_tid(struct tom_data *, void *, unsigned int);
+void update_tid(struct tom_data *, void *, unsigned int);
+void remove_tid(struct tom_data *, unsigned int);
+uint32_t calc_opt0h(struct socket *, int, int, struct l2t_entry *);
+uint32_t calc_opt0l(struct socket *, int);
+void queue_tid_release(struct toedev *, unsigned int);
+void offload_socket(struct socket *, struct toepcb *);
+void undo_offload_socket(struct socket *);
+int select_rcv_wscale(void);
+unsigned long select_rcv_wnd(struct socket *);
+int find_best_mtu_idx(struct adapter *, struct in_conninfo *, int);
+void make_established(struct socket *, uint32_t, uint32_t, uint16_t);
+void t3_rcvd(struct toedev *, struct tcpcb *);
+void t3_pcb_detach(struct toedev *, struct tcpcb *);
+void send_abort_rpl(struct toedev *, int, int);
+void release_tid(struct toedev *, unsigned int, int);
+
+/* cxgb_listen.c */
+void t3_init_listen_cpl_handlers(struct adapter *);
+int t3_listen_start(struct toedev *, struct tcpcb *);
+int t3_listen_stop(struct toedev *, struct tcpcb *);
+void t3_syncache_added(struct toedev *, void *);
+void t3_syncache_removed(struct toedev *, void *);
+int t3_syncache_respond(struct toedev *, void *, struct mbuf *);
+int do_abort_req_synqe(struct sge_qset *, struct rsp_desc *, struct mbuf *);
+int do_abort_rpl_synqe(struct sge_qset *, struct rsp_desc *, struct mbuf *);
+void t3_offload_socket(struct toedev *, void *, struct socket *);
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c
--- a/head/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c	Wed Jul 25 16:55:08 2012 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,140 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c 227309 2011-11-07 15:43:11Z ed $");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/fcntl.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/mutex.h>
-
-#include <sys/sockopt.h>
-#include <sys/sockstate.h>
-#include <sys/sockbuf.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-
-#include <sys/syslog.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_fsm.h>
-#include <net/route.h>
-
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <common/cxgb_t3_cpl.h>
-#include <cxgb_offload.h>
-#include <cxgb_include.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-
-/* Avoid clutter in the hw.* space, keep all toe tunables within hw.cxgb */
-SYSCTL_DECL(_hw_cxgb);
-static SYSCTL_NODE(_hw_cxgb, OID_AUTO, toe, CTLFLAG_RD, 0, "TOE parameters");
-
-static struct tom_tunables default_tunable_vals = {
-	.max_host_sndbuf = 32 * 1024,
-	.tx_hold_thres = 0,
-	.max_wrs = 15,
-	.rx_credit_thres = 15 * 1024,
-	.cong_alg = -1,
-	.mss = 16384,
-	.delack = 1,
-	.max_conn = -1,
-	.soft_backlog_limit = 0,
-	.ddp = 1,
-	.ddp_thres = 14 * 4096,
-	.ddp_copy_limit = 13 * 4096,
-	.ddp_push_wait = 1,
-	.ddp_rcvcoalesce = 0,
-	.zcopy_sosend_enabled = 0,	
-	.zcopy_sosend_partial_thres = 40960,
-	.zcopy_sosend_partial_copy = 4096 * 3,
-	.zcopy_sosend_thres = 128 * 1024,
-	.zcopy_sosend_copy = 4096 * 2,
-	.zcopy_sosend_ret_pending_dma = 1,
-	.activated = 1,
-};
-
-static int activated = 1;
-TUNABLE_INT("hw.cxgb.toe.activated", &activated);
-SYSCTL_UINT(_hw_cxgb_toe, OID_AUTO, activated, CTLFLAG_RDTUN, &activated, 0,
-    "enable TOE at init time");
-
-static int ddp = 1;
-TUNABLE_INT("hw.cxgb.toe.ddp", &ddp);
-SYSCTL_UINT(_hw_cxgb_toe, OID_AUTO, ddp, CTLFLAG_RDTUN, &ddp, 0, "enable DDP");
-
-void
-t3_init_tunables(struct tom_data *t)
-{
-	t->conf = default_tunable_vals;
-
-	/* Adjust tunables */
-	t->conf.activated = activated;
-	t->conf.ddp = ddp;
-
-	/* Now apply device specific fixups. */
-	t->conf.mss = T3C_DATA(t->cdev)->tx_max_chunk;
-	t->conf.max_wrs = T3C_DATA(t->cdev)->max_wrs;
-}
-
-void
-t3_sysctl_register(struct adapter *sc, const struct tom_tunables *p)
-{
-	struct sysctl_ctx_list *ctx;
-	struct sysctl_oid_list *children;
-
-	ctx = device_get_sysctl_ctx(sc->dev);
-	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
-	
-}
-
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/adapter.h
--- a/head/sys/dev/cxgbe/adapter.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/adapter.h	Wed Jul 25 17:04:43 2012 +0300
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/cxgbe/adapter.h 231115 2012-02-07 06:21:59Z np $
+ * $FreeBSD: head/sys/dev/cxgbe/adapter.h 237819 2012-06-29 19:51:06Z np $
  *
  */
 
@@ -44,6 +44,7 @@
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_media.h>
+#include <netinet/in.h>
 #include <netinet/tcp_lro.h>
 
 #include "offload.h"
@@ -156,6 +157,7 @@
 	INTR_DIRECT	= (1 << 2),	/* direct interrupts for everything */
 	MASTER_PF	= (1 << 3),
 	ADAP_SYSCTL_CTX	= (1 << 4),
+	TOM_INIT_DONE	= (1 << 5),
 
 	CXGBE_BUSY	= (1 << 9),
 
@@ -198,7 +200,7 @@
 	int first_txq;	/* index of first tx queue */
 	int nrxq;	/* # of rx queues */
 	int first_rxq;	/* index of first rx queue */
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 	int nofldtxq;		/* # of offload tx queues */
 	int first_ofld_txq;	/* index of first offload tx queue */
 	int nofldrxq;		/* # of offload rx queues */
@@ -212,6 +214,8 @@
 	struct link_config link_cfg;
 	struct port_stats stats;
 
+	eventhandler_tag vlan_c;
+
 	struct callout tick;
 	struct sysctl_ctx_list ctx;	/* from ifconfig up to driver detach */
 
@@ -295,7 +299,7 @@
 enum {
 	EQ_CTRL		= 1,
 	EQ_ETH		= 2,
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 	EQ_OFLD		= 3,
 #endif
 
@@ -388,7 +392,7 @@
 	/* stats for common events first */
 
 	uint64_t txcsum;	/* # of times hardware assisted with checksum */
-	uint64_t tso_wrs;	/* # of IPv4 TSO work requests */
+	uint64_t tso_wrs;	/* # of TSO work requests */
 	uint64_t vlan_insertion;/* # of times VLAN tag was inserted */
 	uint64_t imm_wrs;	/* # of work requests with immediate data */
 	uint64_t sgl_wrs;	/* # of work requests with direct SGL */
@@ -408,7 +412,7 @@
 	struct sge_fl fl;	/* MUST follow iq */
 
 	struct ifnet *ifp;	/* the interface this rxq belongs to */
-#ifdef INET
+#if defined(INET) || defined(INET6)
 	struct lro_ctrl lro;	/* LRO state */
 #endif
 
@@ -421,14 +425,36 @@
 
 } __aligned(CACHE_LINE_SIZE);
 
-#ifndef TCP_OFFLOAD_DISABLE
+static inline struct sge_rxq *
+iq_to_rxq(struct sge_iq *iq)
+{
+
+	return (member2struct(sge_rxq, iq, iq));
+}
+
+
+#ifdef TCP_OFFLOAD
 /* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */
 struct sge_ofld_rxq {
 	struct sge_iq iq;	/* MUST be first */
 	struct sge_fl fl;	/* MUST follow iq */
 } __aligned(CACHE_LINE_SIZE);
+
+static inline struct sge_ofld_rxq *
+iq_to_ofld_rxq(struct sge_iq *iq)
+{
+
+	return (member2struct(sge_ofld_rxq, iq, iq));
+}
 #endif
 
+struct wrqe {
+	STAILQ_ENTRY(wrqe) link;
+	struct sge_wrq *wrq;
+	int wr_len;
+	uint64_t wr[] __aligned(16);
+};
+
 /*
  * wrq: SGE egress queue that is given prebuilt work requests.  Both the control
  * and offload tx queues are of this type.
@@ -437,8 +463,9 @@
 	struct sge_eq eq;	/* MUST be first */
 
 	struct adapter *adapter;
-	struct mbuf *head;	/* held up due to lack of descriptors */
-	struct mbuf *tail;	/* valid only if head is valid */
+
+	/* List of WRs held up due to lack of tx descriptors */
+	STAILQ_HEAD(, wrqe) wr_list;
 
 	/* stats for common events first */
 
@@ -456,7 +483,7 @@
 
 	int nrxq;	/* total # of Ethernet rx queues */
 	int ntxq;	/* total # of Ethernet tx tx queues */
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 	int nofldrxq;	/* total # of TOE rx queues */
 	int nofldtxq;	/* total # of TOE tx queues */
 #endif
@@ -468,7 +495,7 @@
 	struct sge_wrq *ctrlq;	/* Control queues */
 	struct sge_txq *txq;	/* NIC tx queues */
 	struct sge_rxq *rxq;	/* NIC rx queues */
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 	struct sge_wrq *ofld_txq;	/* TOE tx queues */
 	struct sge_ofld_rxq *ofld_rxq;	/* TOE rx queues */
 #endif
@@ -482,6 +509,7 @@
 struct rss_header;
 typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *,
     struct mbuf *);
+typedef int (*an_handler_t)(struct sge_iq *, const struct rsp_ctrl *);
 
 struct adapter {
 	SLIST_ENTRY(adapter) link;
@@ -518,15 +546,15 @@
 	uint8_t chan_map[NCHAN];
 	uint32_t filter_mode;
 
-#ifndef TCP_OFFLOAD_DISABLE
-	struct uld_softc tom;
+#ifdef TCP_OFFLOAD
+	void *tom_softc;	/* (struct tom_data *) */
 	struct tom_tunables tt;
 #endif
 	struct l2t_data *l2t;	/* L2 table */
 	struct tid_info tids;
 
 	int open_device_map;
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 	int offload_map;
 #endif
 	int flags;
@@ -553,7 +581,8 @@
 	TAILQ_HEAD(, sge_fl) sfl;
 	struct callout sfl_callout;
 
-	cpl_handler_t cpl_handler[256] __aligned(CACHE_LINE_SIZE);
+	an_handler_t an_handler __aligned(CACHE_LINE_SIZE);
+	cpl_handler_t cpl_handler[256];
 };
 
 #define ADAPTER_LOCK(sc)		mtx_lock(&(sc)->sc_lock)
@@ -608,82 +637,96 @@
 static inline uint32_t
 t4_read_reg(struct adapter *sc, uint32_t reg)
 {
+
 	return bus_space_read_4(sc->bt, sc->bh, reg);
 }
 
 static inline void
 t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val)
 {
+
 	bus_space_write_4(sc->bt, sc->bh, reg, val);
 }
 
 static inline uint64_t
 t4_read_reg64(struct adapter *sc, uint32_t reg)
 {
+
 	return t4_bus_space_read_8(sc->bt, sc->bh, reg);
 }
 
 static inline void
 t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val)
 {
+
 	t4_bus_space_write_8(sc->bt, sc->bh, reg, val);
 }
 
 static inline void
 t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val)
 {
+
 	*val = pci_read_config(sc->dev, reg, 1);
 }
 
 static inline void
 t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val)
 {
+
 	pci_write_config(sc->dev, reg, val, 1);
 }
 
 static inline void
 t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val)
 {
+
 	*val = pci_read_config(sc->dev, reg, 2);
 }
 
 static inline void
 t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val)
 {
+
 	pci_write_config(sc->dev, reg, val, 2);
 }
 
 static inline void
 t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val)
 {
+
 	*val = pci_read_config(sc->dev, reg, 4);
 }
 
 static inline void
 t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val)
 {
+
 	pci_write_config(sc->dev, reg, val, 4);
 }
 
 static inline struct port_info *
 adap2pinfo(struct adapter *sc, int idx)
 {
+
 	return (sc->port[idx]);
 }
 
 static inline void
 t4_os_set_hw_addr(struct adapter *sc, int idx, uint8_t hw_addr[])
 {
+
 	bcopy(hw_addr, sc->port[idx]->hw_addr, ETHER_ADDR_LEN);
 }
 
 static inline bool is_10G_port(const struct port_info *pi)
 {
+
 	return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0);
 }
 
 static inline int tx_resume_threshold(struct sge_eq *eq)
 {
+
 	return (eq->qsize / 4);
 }
 
@@ -697,6 +740,7 @@
 void t4_os_link_changed(struct adapter *, int, int);
 void t4_iterate(void (*)(struct adapter *, void *), void *);
 int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t);
+int t4_register_an_handler(struct adapter *, an_handler_t);
 
 /* t4_sge.c */
 void t4_sge_modload(void);
@@ -713,21 +757,45 @@
 void t4_intr(void *);
 void t4_intr_err(void *);
 void t4_intr_evt(void *);
-int t4_mgmt_tx(struct adapter *, struct mbuf *);
-int t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct mbuf *);
+void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *);
 int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *);
 void t4_update_fl_bufsize(struct ifnet *);
 int can_resume_tx(struct sge_eq *);
 
-static inline int t4_wrq_tx(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m)
+static inline struct wrqe *
+alloc_wrqe(int wr_len, struct sge_wrq *wrq)
 {
-	int rc;
+	int len = offsetof(struct wrqe, wr) + wr_len;
+	struct wrqe *wr;
+
+	wr = malloc(len, M_CXGBE, M_NOWAIT);
+	if (__predict_false(wr == NULL))
+		return (NULL);
+	wr->wr_len = wr_len;
+	wr->wrq = wrq;
+	return (wr);
+}
+
+static inline void *
+wrtod(struct wrqe *wr)
+{
+	return (&wr->wr[0]);
+}
+
+static inline void
+free_wrqe(struct wrqe *wr)
+{
+	free(wr, M_CXGBE);
+}
+
+static inline void
+t4_wrq_tx(struct adapter *sc, struct wrqe *wr)
+{
+	struct sge_wrq *wrq = wr->wrq;
 
 	TXQ_LOCK(wrq);
-	rc = t4_wrq_tx_locked(sc, wrq, m);
+	t4_wrq_tx_locked(sc, wrq, wr);
 	TXQ_UNLOCK(wrq);
-	return (rc);
 }
 
-
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/common/common.h
--- a/head/sys/dev/cxgbe/common/common.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/common/common.h	Wed Jul 25 17:04:43 2012 +0300
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/cxgbe/common/common.h 228561 2011-12-16 02:09:51Z np $
+ * $FreeBSD: head/sys/dev/cxgbe/common/common.h 237436 2012-06-22 07:51:15Z np $
  *
  */
 
@@ -38,6 +38,8 @@
 	SERNUM_LEN     = 24,    /* Serial # length */
 	EC_LEN         = 16,    /* E/C length */
 	ID_LEN         = 16,    /* ID length */
+	PN_LEN         = 16,    /* Part Number length */
+	MACADDR_LEN    = 12,    /* MAC Address length */
 };
 
 enum { MEM_EDC0, MEM_EDC1, MEM_MC };
@@ -62,8 +64,8 @@
 };
 
 #define FW_VERSION_MAJOR 1
-#define FW_VERSION_MINOR 4
-#define FW_VERSION_MICRO 16
+#define FW_VERSION_MINOR 5
+#define FW_VERSION_MICRO 2
 
 struct port_stats {
 	u64 tx_octets;            /* total # of octets in good frames */
@@ -219,6 +221,8 @@
 	u8 ec[EC_LEN + 1];
 	u8 sn[SERNUM_LEN + 1];
 	u8 id[ID_LEN + 1];
+	u8 pn[PN_LEN + 1];
+	u8 na[MACADDR_LEN + 1];
 };
 
 struct pci_params {
@@ -356,6 +360,8 @@
 		       unsigned int data_reg, const u32 *vals,
 		       unsigned int nregs, unsigned int start_idx);
 
+u32 t4_hw_pci_read_cfg4(adapter_t *adapter, int reg);
+
 struct fw_filter_wr;
 
 void t4_intr_enable(struct adapter *adapter);
@@ -374,7 +380,7 @@
 int t4_read_flash(struct adapter *adapter, unsigned int addr, unsigned int nwords,
 		  u32 *data, int byte_oriented);
 int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size);
-int t4_load_boot(struct adapter *adap, const u8 *boot_data,
+int t4_load_boot(struct adapter *adap, u8 *boot_data,
                  unsigned int boot_addr, unsigned int size);
 unsigned int t4_flash_cfg_addr(struct adapter *adapter);
 int t4_load_cfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size);
@@ -431,6 +437,9 @@
 		__be32 *data);
 
 void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p);
+void t4_get_port_stats_offset(struct adapter *adap, int idx,
+		struct port_stats *stats,
+		struct port_stats *offset);
 void t4_get_lb_stats(struct adapter *adap, int idx, struct lb_port_stats *p);
 void t4_clr_port_stats(struct adapter *adap, int idx);
 
@@ -472,6 +481,10 @@
 		enum dev_master master, enum dev_state *state);
 int t4_fw_bye(struct adapter *adap, unsigned int mbox);
 int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset);
+int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force);
+int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset);
+int t4_fw_upgrade(struct adapter *adap, unsigned int mbox,
+		  const u8 *fw_data, unsigned int size, int force);
 int t4_fw_initialize(struct adapter *adap, unsigned int mbox);
 int t4_query_params(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		    unsigned int vf, unsigned int nparams, const u32 *params,
@@ -484,6 +497,10 @@
 		unsigned int rxqi, unsigned int rxq, unsigned int tc,
 		unsigned int vi, unsigned int cmask, unsigned int pmask,
 		unsigned int exactf, unsigned int rcaps, unsigned int wxcaps);
+int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox,
+		     unsigned int port, unsigned int pf, unsigned int vf,
+		     unsigned int nmac, u8 *mac, unsigned int *rss_size,
+		     unsigned int portfunc, unsigned int idstype);
 int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
 		unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac,
 		unsigned int *rss_size);
@@ -524,5 +541,10 @@
 		   enum ctxt_type ctype, u32 *data);
 int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid, enum ctxt_type ctype,
 		      u32 *data);
+int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox);
 int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl);
+int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val);
+int t4_config_scheduler(struct adapter *adapter, int mode, int level, int pktsize,
+                        int sched_class, int port, int rate, int unit,
+			int weight, int minrate, int maxrate);
 #endif /* __CHELSIO_COMMON_H */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/common/t4_hw.c
--- a/head/sys/dev/cxgbe/common/t4_hw.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/common/t4_hw.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2011 Chelsio Communications, Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,7 +25,9 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/cxgbe/common/t4_hw.c 231592 2012-02-13 18:41:32Z np $");
+__FBSDID("$FreeBSD: head/sys/dev/cxgbe/common/t4_hw.c 237436 2012-06-22 07:51:15Z np $");
+
+#include "opt_inet.h"
 
 #include "common.h"
 #include "t4_regs.h"
@@ -133,6 +135,20 @@
 }
 
 /*
+ * Read a 32-bit PCI Configuration Space register via the PCI-E backdoor
+ * mechanism.  This guarantees that we get the real value even if we're
+ * operating within a Virtual Machine and the Hypervisor is trapping our
+ * Configuration Space accesses.
+ */
+u32 t4_hw_pci_read_cfg4(adapter_t *adap, int reg)
+{
+	t4_write_reg(adap, A_PCIE_CFG_SPACE_REQ,
+		     F_ENABLE | F_LOCALCFG | V_FUNCTION(adap->pf) |
+		     V_REGISTER(reg));
+	return t4_read_reg(adap, A_PCIE_CFG_SPACE_DATA);
+}
+
+/*
  * Get the reply to a mailbox command and store it in @rpl in big-endian order.
  */
 static void get_mbox_rpl(struct adapter *adap, __be64 *rpl, int nflit,
@@ -194,7 +210,6 @@
 	u64 res;
 	int i, ms, delay_idx;
 	const __be64 *p = cmd;
-
 	u32 data_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_DATA);
 	u32 ctl_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_CTRL);
 
@@ -281,7 +296,7 @@
 #define MC_DATA(i) MC_BIST_STATUS_REG(A_MC_BIST_STATUS_RDATA, i)
 
 	for (i = 15; i >= 0; i--)
-		*data++ = htonl(t4_read_reg(adap, MC_DATA(i)));
+		*data++ = ntohl(t4_read_reg(adap, MC_DATA(i)));
 	if (ecc)
 		*ecc = t4_read_reg64(adap, MC_DATA(16));
 #undef MC_DATA
@@ -319,7 +334,7 @@
 #define EDC_DATA(i) (EDC_BIST_STATUS_REG(A_EDC_BIST_STATUS_RDATA, i) + idx)
 
 	for (i = 15; i >= 0; i--)
-		*data++ = htonl(t4_read_reg(adap, EDC_DATA(i)));
+		*data++ = ntohl(t4_read_reg(adap, EDC_DATA(i)));
 	if (ecc)
 		*ecc = t4_read_reg64(adap, EDC_DATA(16));
 #undef EDC_DATA
@@ -564,7 +579,7 @@
 static int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
 {
 	int i, ret, addr;
-	int ec, sn;
+	int ec, sn, pn, na;
 	u8 vpd[VPD_LEN], csum;
 	const struct t4_vpd_hdr *v;
 
@@ -600,6 +615,8 @@
 	}
 	FIND_VPD_KW(ec, "EC");
 	FIND_VPD_KW(sn, "SN");
+	FIND_VPD_KW(pn, "PN");
+	FIND_VPD_KW(na, "NA");
 #undef FIND_VPD_KW
 
 	memcpy(p->id, v->id_data, ID_LEN);
@@ -609,6 +626,10 @@
 	i = vpd[sn - VPD_INFO_FLD_HDR_SIZE + 2];
 	memcpy(p->sn, vpd + sn, min(i, SERNUM_LEN));
 	strstrip(p->sn);
+	memcpy(p->pn, vpd + pn, min(i, PN_LEN));
+	strstrip((char *)p->pn);
+	memcpy(p->na, vpd + na, min(i, MACADDR_LEN));
+	strstrip((char *)p->na);
 
 	return 0;
 }
@@ -952,7 +973,7 @@
 	if (ret || size == 0)
 		goto out;
 
-        /* this will write to the flash up to SF_PAGE_SIZE at a time */
+	/* this will write to the flash up to SF_PAGE_SIZE at a time */
 	for (i = 0; i< size; i+= SF_PAGE_SIZE) {
 		if ( (size - i) <  SF_PAGE_SIZE) 
 			n = size - i;
@@ -1054,42 +1075,209 @@
 	return ret;
 }
 
-/* BIOS boot header */
-typedef struct boot_header_s {
-	u8	signature[2];	/* signature */
-	u8	length;		/* image length (include header) */
-	u8	offset[4];	/* initialization vector */
-	u8	reserved[19];	/* reserved */
-	u8	exheader[2];	/* offset to expansion header */
-} boot_header_t;
-
+/* BIOS boot headers */
+typedef struct pci_expansion_rom_header {
+	u8	signature[2]; /* ROM Signature. Should be 0xaa55 */
+	u8	reserved[22]; /* Reserved per processor Architecture data */
+	u8	pcir_offset[2]; /* Offset to PCI Data Structure */
+} pci_exp_rom_header_t; /* PCI_EXPANSION_ROM_HEADER */
+
+/* Legacy PCI Expansion ROM Header */
+typedef struct legacy_pci_expansion_rom_header {
+	u8	signature[2]; /* ROM Signature. Should be 0xaa55 */
+	u8	size512; /* Current Image Size in units of 512 bytes */
+	u8	initentry_point[4];
+	u8	cksum; /* Checksum computed on the entire Image */
+	u8	reserved[16]; /* Reserved */
+	u8	pcir_offset[2]; /* Offset to PCI Data Struture */
+} legacy_pci_exp_rom_header_t; /* LEGACY_PCI_EXPANSION_ROM_HEADER */
+
+/* EFI PCI Expansion ROM Header */
+typedef struct efi_pci_expansion_rom_header {
+	u8	signature[2]; // ROM signature. The value 0xaa55
+	u8	initialization_size[2]; /* Units 512. Includes this header */
+	u8	efi_signature[4]; /* Signature from EFI image header. 0x0EF1 */
+	u8	efi_subsystem[2]; /* Subsystem value for EFI image header */
+	u8	efi_machine_type[2]; /* Machine type from EFI image header */
+	u8	compression_type[2]; /* Compression type. */
+		/* 
+		 * Compression type definition
+		 * 0x0: uncompressed
+		 * 0x1: Compressed
+		 * 0x2-0xFFFF: Reserved
+		 */
+	u8	reserved[8]; /* Reserved */
+	u8	efi_image_header_offset[2]; /* Offset to EFI Image */
+	u8	pcir_offset[2]; /* Offset to PCI Data Structure */
+} efi_pci_exp_rom_header_t; /* EFI PCI Expansion ROM Header */
+
+/* PCI Data Structure Format */
+typedef struct pcir_data_structure { /* PCI Data Structure */
+	u8	signature[4]; /* Signature. The string "PCIR" */
+	u8	vendor_id[2]; /* Vendor Identification */
+	u8	device_id[2]; /* Device Identification */
+	u8	vital_product[2]; /* Pointer to Vital Product Data */
+	u8	length[2]; /* PCIR Data Structure Length */
+	u8	revision; /* PCIR Data Structure Revision */
+	u8	class_code[3]; /* Class Code */
+	u8	image_length[2]; /* Image Length. Multiple of 512B */
+	u8	code_revision[2]; /* Revision Level of Code/Data */
+	u8	code_type; /* Code Type. */
+		/*
+		 * PCI Expansion ROM Code Types
+		 * 0x00: Intel IA-32, PC-AT compatible. Legacy
+		 * 0x01: Open Firmware standard for PCI. FCODE
+		 * 0x02: Hewlett-Packard PA RISC. HP reserved
+		 * 0x03: EFI Image. EFI
+		 * 0x04-0xFF: Reserved.
+		 */
+	u8	indicator; /* Indicator. Identifies the last image in the ROM */
+	u8	reserved[2]; /* Reserved */
+} pcir_data_t; /* PCI__DATA_STRUCTURE */
+
+/* BOOT constants */
 enum {
 	BOOT_FLASH_BOOT_ADDR = 0x0,/* start address of boot image in flash */
 	BOOT_SIGNATURE = 0xaa55,   /* signature of BIOS boot ROM */
 	BOOT_SIZE_INC = 512,       /* image size measured in 512B chunks */
-	BOOT_MIN_SIZE = sizeof(boot_header_t), /* at least basic header */
-	BOOT_MAX_SIZE = 1024*BOOT_SIZE_INC /* 1 byte * length increment  */
+	BOOT_MIN_SIZE = sizeof(pci_exp_rom_header_t), /* basic header */
+	BOOT_MAX_SIZE = 1024*BOOT_SIZE_INC, /* 1 byte * length increment  */
+	VENDOR_ID = 0x1425, /* Vendor ID */
+	PCIR_SIGNATURE = 0x52494350 /* PCIR signature */
 };
 
 /*
+ *	modify_device_id - Modifies the device ID of the Boot BIOS image 
+ *	@adatper: the device ID to write.
+ *	@boot_data: the boot image to modify.
+ *
+ *	Write the supplied device ID to the boot BIOS image.
+ */
+static void modify_device_id(int device_id, u8 *boot_data)
+{
+	legacy_pci_exp_rom_header_t *header;
+	pcir_data_t *pcir_header;
+	u32 cur_header = 0;
+
+	/*
+	 * Loop through all chained images and change the device ID's
+	 */
+	while (1) {
+		header = (legacy_pci_exp_rom_header_t *) &boot_data[cur_header];
+		pcir_header = (pcir_data_t *) &boot_data[cur_header +
+		    le16_to_cpu(*(u16*)header->pcir_offset)];
+
+		/*
+		 * Only modify the Device ID if code type is Legacy or HP.
+		 * 0x00: Okay to modify
+		 * 0x01: FCODE. Do not be modify
+		 * 0x03: Okay to modify
+		 * 0x04-0xFF: Do not modify
+		 */
+		if (pcir_header->code_type == 0x00) {
+			u8 csum = 0;
+			int i;
+
+			/*
+			 * Modify Device ID to match current adatper
+			 */
+			*(u16*) pcir_header->device_id = device_id;
+
+			/*
+			 * Set checksum temporarily to 0.
+			 * We will recalculate it later.
+			 */
+			header->cksum = 0x0;
+
+			/*
+			 * Calculate and update checksum
+			 */
+			for (i = 0; i < (header->size512 * 512); i++)
+				csum += (u8)boot_data[cur_header + i];
+
+			/*
+			 * Invert summed value to create the checksum
+			 * Writing new checksum value directly to the boot data
+			 */
+			boot_data[cur_header + 7] = -csum;
+
+		} else if (pcir_header->code_type == 0x03) {
+
+			/*
+			 * Modify Device ID to match current adatper
+			 */
+			*(u16*) pcir_header->device_id = device_id;
+
+		}
+
+
+		/*
+		 * Check indicator element to identify if this is the last
+		 * image in the ROM.
+		 */
+		if (pcir_header->indicator & 0x80)
+			break;
+
+		/*
+		 * Move header pointer up to the next image in the ROM.
+		 */
+		cur_header += header->size512 * 512;
+	}
+}
+
+/*
  *	t4_load_boot - download boot flash
  *	@adapter: the adapter
  *	@boot_data: the boot image to write
+ *	@boot_addr: offset in flash to write boot_data
  *	@size: image size
  *
  *	Write the supplied boot image to the card's serial flash.
  *	The boot image has the following sections: a 28-byte header and the
  *	boot image.
  */
-int t4_load_boot(struct adapter *adap, const u8 *boot_data, 
+int t4_load_boot(struct adapter *adap, u8 *boot_data, 
 		 unsigned int boot_addr, unsigned int size)
 {
+	pci_exp_rom_header_t *header;
+	int pcir_offset ;
+	pcir_data_t *pcir_header;
 	int ret, addr;
+	uint16_t device_id;
 	unsigned int i;
 	unsigned int boot_sector = boot_addr * 1024;
 	unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
 
 	/*
+	 * Make sure the boot image does not encroach on the firmware region
+	 */
+	if ((boot_sector + size) >> 16 > FLASH_FW_START_SEC) {
+		CH_ERR(adap, "boot image encroaching on firmware region\n");
+		return -EFBIG;
+	}
+
+	/*
+	 * Number of sectors spanned
+	 */
+	i = DIV_ROUND_UP(size ? size : FLASH_BOOTCFG_MAX_SIZE,
+			sf_sec_size);
+	ret = t4_flash_erase_sectors(adap, boot_sector >> 16,
+				     (boot_sector >> 16) + i - 1);
+
+	/*
+	 * If size == 0 then we're simply erasing the FLASH sectors associated
+	 * with the on-adapter option ROM file
+	 */
+	if (ret || (size == 0))
+		goto out;
+
+	/* Get boot header */
+	header = (pci_exp_rom_header_t *)boot_data;
+	pcir_offset = le16_to_cpu(*(u16 *)header->pcir_offset);
+	/* PCIR Data Structure */
+	pcir_header = (pcir_data_t *) &boot_data[pcir_offset];
+
+	/*
 	 * Perform some primitive sanity testing to avoid accidentally
 	 * writing garbage over the boot sectors.  We ought to check for
 	 * more but it's not worth it for now ...
@@ -1100,18 +1288,46 @@
 	}
 
 	/*
-	 * Make sure the boot image does not encroach on the firmware region
+	 * Check BOOT ROM header signature
 	 */
-	if ((boot_sector + size) >> 16 > FLASH_FW_START_SEC) {
-		CH_ERR(adap, "boot image encroaching on firmware region\n");
-		return -EFBIG;
+	if (le16_to_cpu(*(u16*)header->signature) != BOOT_SIGNATURE ) {
+		CH_ERR(adap, "Boot image missing signature\n");
+		return -EINVAL;
 	}
 
-	i = DIV_ROUND_UP(size, sf_sec_size);        /* # of sectors spanned */
-	ret = t4_flash_erase_sectors(adap, boot_sector >> 16, 
-				     (boot_sector >> 16) + i - 1);
-	if (ret)
-		goto out;
+	/*
+	 * Check PCI header signature
+	 */
+	if (le32_to_cpu(*(u32*)pcir_header->signature) != PCIR_SIGNATURE) {
+		CH_ERR(adap, "PCI header missing signature\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Check Vendor ID matches Chelsio ID
+	 */
+	if (le16_to_cpu(*(u16*)pcir_header->vendor_id) != VENDOR_ID) {
+		CH_ERR(adap, "Vendor ID missing signature\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Retrieve adapter's device ID
+	 */
+	t4_os_pci_read_cfg2(adap, PCI_DEVICE_ID, &device_id);
+	/* Want to deal with PF 0 so I strip off PF 4 indicator */
+	device_id = (device_id & 0xff) | 0x4000;
+
+	/*
+	 * Check PCIE Device ID
+	 */
+	if (le16_to_cpu(*(u16*)pcir_header->device_id) != device_id) {
+		/*
+		 * Change the device ID in the Boot BIOS image to match
+		 * the Device ID of the current adapter.
+		 */
+		modify_device_id(device_id, boot_data);
+	}
 
 	/*
 	 * Skip over the first SF_PAGE_SIZE worth of data and write it after
@@ -3204,7 +3420,11 @@
  *	@enable: whether to enable or disable the filter
  *
  *	Configures one of the tracing filters available in HW.  If @enable is
- *	%0 @tp is not examined and may be %NULL.
+ *	%0 @tp is not examined and may be %NULL. The user is responsible to
+ *	set the single/multiple trace mode by writing to A_MPS_TRC_CFG register
+ *	by using "cxgbtool iface reg reg_addr=val" command. See t4_sniffer/
+ *	docs/readme.txt for a complete description of how to setup traceing on
+ *	T4.
  */
 int t4_set_trace_filter(struct adapter *adap, const struct trace_params *tp, int idx,
 			int enable)
@@ -3215,45 +3435,45 @@
 
 	if (!enable) {
 		t4_write_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + ofst, 0);
-		goto out;
+		return 0;
 	}
 
-	if (tp->port > 11 || tp->invert > 1 || tp->skip_len > M_TFLENGTH ||
-	    tp->skip_ofst > M_TFOFFSET || tp->min_len > M_TFMINPKTSIZE ||
-	    tp->snap_len > 9600 || (idx && tp->snap_len > 256))
-		return -EINVAL;
-
-	if (tp->snap_len > 256) {            /* must be tracer 0 */
-		if ((t4_read_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + 4) |
-		     t4_read_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + 8) |
-		     t4_read_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + 12)) &
-		    F_TFEN)
-			return -EINVAL;  /* other tracers are enabled */
+	/*
+	 * TODO - After T4 data book is updated, specify the exact
+	 * section below.
+	 *
+	 * See T4 data book - MPS section for a complete description 
+	 * of the below if..else handling of A_MPS_TRC_CFG register 
+	 * value.
+	 */ 
+	cfg = t4_read_reg(adap, A_MPS_TRC_CFG);
+	if (cfg & F_TRCMULTIFILTER) {
+		/*
+		 * If multiple tracers are enabled, then maximum
+		 * capture size is 2.5KB (FIFO size of a single channel)
+		 * minus 2 flits for CPL_TRACE_PKT header.
+		 */
+		if (tp->snap_len > ((10 * 1024 / 4) - (2 * 8)))
+			return -EINVAL;		
+	}
+	else {
+		/*
+		 * If multiple tracers are disabled, to avoid deadlocks 
+		 * maximum packet capture size of 9600 bytes is recommended.
+		 * Also in this mode, only trace0 can be enabled and running.
+		 */
 		multitrc = 0;
-	} else if (idx) {
-		i = t4_read_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_B);
-		if (G_TFCAPTUREMAX(i) > 256 &&
-		    (t4_read_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A) & F_TFEN))
+		if (tp->snap_len > 9600 || idx)
 			return -EINVAL;
 	}
 
+	if (tp->port > 11 || tp->invert > 1 || tp->skip_len > M_TFLENGTH ||
+	    tp->skip_ofst > M_TFOFFSET || tp->min_len > M_TFMINPKTSIZE)
+		return -EINVAL;
+
 	/* stop the tracer we'll be changing */
 	t4_write_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + ofst, 0);
 
-	/* disable tracing globally if running in the wrong single/multi mode */
-	cfg = t4_read_reg(adap, A_MPS_TRC_CFG);
-	if ((cfg & F_TRCEN) && multitrc != (cfg & F_TRCMULTIFILTER)) {
-		t4_write_reg(adap, A_MPS_TRC_CFG, cfg ^ F_TRCEN);
-		t4_read_reg(adap, A_MPS_TRC_CFG);                  /* flush */
-		msleep(1);
-		if (!(t4_read_reg(adap, A_MPS_TRC_CFG) & F_TRCFIFOEMPTY))
-			return -ETIMEDOUT;
-	}
-	/*
-	 * At this point either the tracing is enabled and in the right mode or
-	 * disabled.
-	 */
-
 	idx *= (A_MPS_TRC_FILTER1_MATCH - A_MPS_TRC_FILTER0_MATCH);
 	data_reg = A_MPS_TRC_FILTER0_MATCH + idx;
 	mask_reg = A_MPS_TRC_FILTER0_DONT_CARE + idx;
@@ -3269,9 +3489,6 @@
 		     V_TFOFFSET(tp->skip_ofst) | V_TFLENGTH(tp->skip_len) |
 		     V_TFPORT(tp->port) | F_TFEN | V_TFINVERTMATCH(tp->invert));
 
-	cfg &= ~F_TRCMULTIFILTER;
-	t4_write_reg(adap, A_MPS_TRC_CFG, cfg | F_TRCEN | multitrc);
-out:	t4_read_reg(adap, A_MPS_TRC_CFG);  /* flush */
 	return 0;
 }
 
@@ -3371,6 +3588,28 @@
 }
 
 /**
+ *      t4_get_port_stats_offset - collect port stats relative to a previous
+ *                                 snapshot
+ *      @adap: The adapter
+ *      @idx: The port
+ *      @stats: Current stats to fill
+ *      @offset: Previous stats snapshot
+ */
+void t4_get_port_stats_offset(struct adapter *adap, int idx,
+		struct port_stats *stats,
+		struct port_stats *offset)
+{
+	u64 *s, *o;
+	int i;
+
+	t4_get_port_stats(adap, idx, stats);
+	for (i = 0, s = (u64 *)stats, o = (u64 *)offset ;
+			i < (sizeof(struct port_stats)/sizeof(u64)) ;
+			i++, s++, o++)
+		*s -= *o;
+}
+
+/**
  *	t4_get_port_stats - collect port statistics
  *	@adap: the adapter
  *	@idx: the port index
@@ -3631,6 +3870,20 @@
 	(var).retval_len16 = htonl(FW_LEN16(var)); \
 } while (0)
 
+int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val)
+{
+	struct fw_ldst_cmd c;
+
+	memset(&c, 0, sizeof(c));
+	c.op_to_addrspace = htonl(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST |
+		F_FW_CMD_WRITE | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FIRMWARE));
+	c.cycles_to_len16 = htonl(FW_LEN16(c));
+	c.u.addrval.addr = htonl(addr);
+	c.u.addrval.val = htonl(val);
+
+	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
+}
+
 /**
  *	t4_mdio_rd - read a PHY register through MDIO
  *	@adap: the adapter
@@ -3691,6 +3944,30 @@
 }
 
 /**
+ *	t4_sge_ctxt_flush - flush the SGE context cache
+ *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW command
+ *
+ *	Issues a FW command through the given mailbox to flush the
+ *	SGE context cache.
+ */
+int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox)
+{
+	int ret;
+	struct fw_ldst_cmd c;
+
+	memset(&c, 0, sizeof(c));
+	c.op_to_addrspace = htonl(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST |
+			F_FW_CMD_READ |
+			V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_SGE_EGRC));
+	c.cycles_to_len16 = htonl(FW_LEN16(c));
+	c.u.idctxt.msg_ctxtflush = htonl(F_FW_LDST_CMD_CTXTFLUSH);
+
+	ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c);
+	return ret;
+}
+
+/**
  *	t4_sge_ctxt_rd - read an SGE context through FW
  *	@adap: the adapter
  *	@mbox: mailbox to use for the FW command
@@ -3762,9 +4039,10 @@
  *	@mbox: mailbox to use for the FW command
  *	@evt_mbox: mailbox to receive async FW events
  *	@master: specifies the caller's willingness to be the device master
- *	@state: returns the current device state
+ *	@state: returns the current device state (if non-NULL)
  *
- *	Issues a command to establish communication with FW.
+ *	Issues a command to establish communication with FW.  Returns either
+ *	an error (negative integer) or the mailbox of the Master PF.
  */
 int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
 		enum dev_master master, enum dev_state *state)
@@ -3916,6 +4194,175 @@
 }
 
 /**
+ *	t4_fw_halt - issue a reset/halt to FW and put uP into RESET
+ *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW RESET command (if desired)
+ *	@force: force uP into RESET even if FW RESET command fails
+ *
+ *	Issues a RESET command to firmware (if desired) with a HALT indication
+ *	and then puts the microprocessor into RESET state.  The RESET command
+ *	will only be issued if a legitimate mailbox is provided (mbox <=
+ *	M_PCIE_FW_MASTER).
+ *
+ *	This is generally used in order for the host to safely manipulate the
+ *	adapter without fear of conflicting with whatever the firmware might
+ *	be doing.  The only way out of this state is to RESTART the firmware
+ *	...
+ */
+int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force)
+{
+	int ret = 0;
+
+	/*
+	 * If a legitimate mailbox is provided, issue a RESET command
+	 * with a HALT indication.
+	 */
+	if (mbox <= M_PCIE_FW_MASTER) {
+		struct fw_reset_cmd c;
+
+		memset(&c, 0, sizeof(c));
+		INIT_CMD(c, RESET, WRITE);
+		c.val = htonl(F_PIORST | F_PIORSTMODE);
+		c.halt_pkd = htonl(F_FW_RESET_CMD_HALT);
+		ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
+	}
+
+	/*
+	 * Normally we won't complete the operation if the firmware RESET
+	 * command fails but if our caller insists we'll go ahead and put the
+	 * uP into RESET.  This can be useful if the firmware is hung or even
+	 * missing ...  We'll have to take the risk of putting the uP into
+	 * RESET without the cooperation of firmware in that case.
+	 *
+	 * We also force the firmware's HALT flag to be on in case we bypassed
+	 * the firmware RESET command above or we're dealing with old firmware
+	 * which doesn't have the HALT capability.  This will serve as a flag
+	 * for the incoming firmware to know that it's coming out of a HALT
+	 * rather than a RESET ... if it's new enough to understand that ...
+	 */
+	if (ret == 0 || force) {
+		t4_set_reg_field(adap, A_CIM_BOOT_CFG, F_UPCRST, F_UPCRST);
+		t4_set_reg_field(adap, A_PCIE_FW, F_PCIE_FW_HALT, F_PCIE_FW_HALT);
+	}
+
+	/*
+	 * And we always return the result of the firmware RESET command
+	 * even when we force the uP into RESET ...
+	 */
+	return ret;
+}
+
+/**
+ *	t4_fw_restart - restart the firmware by taking the uP out of RESET
+ *	@adap: the adapter
+ *	@reset: if we want to do a RESET to restart things
+ *
+ *	Restart firmware previously halted by t4_fw_halt().  On successful
+ *	return the previous PF Master remains as the new PF Master and there
+ *	is no need to issue a new HELLO command, etc.
+ *
+ *	We do this in two ways:
+ *
+ *	 1. If we're dealing with newer firmware we'll simply want to take
+ *	    the chip's microprocessor out of RESET.  This will cause the
+ *	    firmware to start up from its start vector.  And then we'll loop
+ *	    until the firmware indicates it's started again (PCIE_FW.HALT
+ *	    reset to 0) or we timeout.
+ *
+ *	 2. If we're dealing with older firmware then we'll need to RESET
+ *	    the chip since older firmware won't recognize the PCIE_FW.HALT
+ *	    flag and automatically RESET itself on startup.
+ */
+int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset)
+{
+	if (reset) {
+		/*
+		 * Since we're directing the RESET instead of the firmware
+		 * doing it automatically, we need to clear the PCIE_FW.HALT
+		 * bit.
+		 */
+		t4_set_reg_field(adap, A_PCIE_FW, F_PCIE_FW_HALT, 0);
+
+		/*
+		 * If we've been given a valid mailbox, first try to get the
+		 * firmware to do the RESET.  If that works, great and we can
+		 * return success.  Otherwise, if we haven't been given a
+		 * valid mailbox or the RESET command failed, fall back to
+		 * hitting the chip with a hammer.
+		 */
+		if (mbox <= M_PCIE_FW_MASTER) {
+			t4_set_reg_field(adap, A_CIM_BOOT_CFG, F_UPCRST, 0);
+			msleep(100);
+			if (t4_fw_reset(adap, mbox,
+					F_PIORST | F_PIORSTMODE) == 0)
+				return 0;
+		}
+
+		t4_write_reg(adap, A_PL_RST, F_PIORST | F_PIORSTMODE);
+		msleep(2000);
+	} else {
+		int ms;
+
+		t4_set_reg_field(adap, A_CIM_BOOT_CFG, F_UPCRST, 0);
+		for (ms = 0; ms < FW_CMD_MAX_TIMEOUT; ) {
+			if (!(t4_read_reg(adap, A_PCIE_FW) & F_PCIE_FW_HALT))
+				return FW_SUCCESS;
+			msleep(100);
+			ms += 100;
+		}
+		return -ETIMEDOUT;
+	}
+	return 0;
+}
+
+/**
+ *	t4_fw_upgrade - perform all of the steps necessary to upgrade FW
+ *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW RESET command (if desired)
+ *	@fw_data: the firmware image to write
+ *	@size: image size
+ *	@force: force upgrade even if firmware doesn't cooperate
+ *
+ *	Perform all of the steps necessary for upgrading an adapter's
+ *	firmware image.  Normally this requires the cooperation of the
+ *	existing firmware in order to halt all existing activities
+ *	but if an invalid mailbox token is passed in we skip that step
+ *	(though we'll still put the adapter microprocessor into RESET in
+ *	that case).
+ *
+ *	On successful return the new firmware will have been loaded and
+ *	the adapter will have been fully RESET losing all previous setup
+ *	state.  On unsuccessful return the adapter may be completely hosed ...
+ *	positive errno indicates that the adapter is ~probably~ intact, a
+ *	negative errno indicates that things are looking bad ...
+ */
+int t4_fw_upgrade(struct adapter *adap, unsigned int mbox,
+		  const u8 *fw_data, unsigned int size, int force)
+{
+	const struct fw_hdr *fw_hdr = (const struct fw_hdr *)fw_data;
+	int reset, ret;
+
+	ret = t4_fw_halt(adap, mbox, force);
+	if (ret < 0 && !force)
+		return ret;
+
+	ret = t4_load_fw(adap, fw_data, size);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Older versions of the firmware don't understand the new
+	 * PCIE_FW.HALT flag and so won't know to perform a RESET when they
+	 * restart.  So for newly loaded older firmware we'll have to do the
+	 * RESET for it so it starts up on a clean slate.  We can tell if
+	 * the newly loaded firmware will handle this right by checking
+	 * its header flags to see if it advertises the capability.
+	 */
+	reset = ((ntohl(fw_hdr->flags) & FW_HDR_FLAGS_RESET_HALT) == 0);
+	return t4_fw_restart(adap, mbox, reset);
+}
+
+/**
  *	t4_fw_initialize - ask FW to initialize the device
  *	@adap: the adapter
  *	@mbox: mailbox to use for the FW command
@@ -4057,7 +4504,7 @@
 }
 
 /**
- *	t4_alloc_vi - allocate a virtual interface
+ *	t4_alloc_vi_func - allocate a virtual interface
  *	@adap: the adapter
  *	@mbox: mailbox to use for the FW command
  *	@port: physical port associated with the VI
@@ -4066,6 +4513,8 @@
  *	@nmac: number of MAC addresses needed (1 to 5)
  *	@mac: the MAC addresses of the VI
  *	@rss_size: size of RSS table slice associated with this VI
+ *	@portfunc: which Port Application Function MAC Address is desired
+ *	@idstype: Intrusion Detection Type
  *
  *	Allocates a virtual interface for the given physical port.  If @mac is
  *	not %NULL it contains the MAC addresses of the VI as assigned by FW.
@@ -4073,9 +4522,10 @@
  *	stored consecutively so the space needed is @nmac * 6 bytes.
  *	Returns a negative error number or the non-negative VI id.
  */
-int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
-		unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac,
-		unsigned int *rss_size)
+int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox,
+		     unsigned int port, unsigned int pf, unsigned int vf,
+		     unsigned int nmac, u8 *mac, unsigned int *rss_size,
+		     unsigned int portfunc, unsigned int idstype)
 {
 	int ret;
 	struct fw_vi_cmd c;
@@ -4085,6 +4535,8 @@
 			    F_FW_CMD_WRITE | F_FW_CMD_EXEC |
 			    V_FW_VI_CMD_PFN(pf) | V_FW_VI_CMD_VFN(vf));
 	c.alloc_to_len16 = htonl(F_FW_VI_CMD_ALLOC | FW_LEN16(c));
+	c.type_to_viid = htons(V_FW_VI_CMD_TYPE(idstype) |
+			       V_FW_VI_CMD_FUNC(portfunc));
 	c.portid_pkd = V_FW_VI_CMD_PORTID(port);
 	c.nmac = nmac - 1;
 
@@ -4107,7 +4559,30 @@
 	}
 	if (rss_size)
 		*rss_size = G_FW_VI_CMD_RSSSIZE(ntohs(c.rsssize_pkd));
-	return G_FW_VI_CMD_VIID(ntohs(c.type_to_viid));
+	return G_FW_VI_CMD_VIID(htons(c.type_to_viid));
+}
+
+/**
+ *	t4_alloc_vi - allocate an [Ethernet Function] virtual interface
+ *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW command
+ *	@port: physical port associated with the VI
+ *	@pf: the PF owning the VI
+ *	@vf: the VF owning the VI
+ *	@nmac: number of MAC addresses needed (1 to 5)
+ *	@mac: the MAC addresses of the VI
+ *	@rss_size: size of RSS table slice associated with this VI
+ *
+ *	backwards compatible and convieniance routine to allocate a Virtual
+ *	Interface with a Ethernet Port Application Function and Intrustion
+ *	Detection System disabled.
+ */
+int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
+		unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac,
+		unsigned int *rss_size)
+{
+	return t4_alloc_vi_func(adap, mbox, port, pf, vf, nmac, mac, rss_size,
+				FW_VI_FUNC_ETH, 0);
 }
 
 /**
@@ -4212,7 +4687,7 @@
 	unsigned int nfilters = 0;
 	unsigned int rem = naddr;
 
-	if (naddr > FW_CLS_TCAM_NUM_ENTRIES)
+	if (naddr > NUM_MPS_CLS_SRAM_L_INSTANCES)
 		return -EINVAL;
 
 	for (offset = 0; offset < naddr ; /**/) {
@@ -4253,10 +4728,10 @@
 			u16 index = G_FW_VI_MAC_CMD_IDX(ntohs(p->valid_to_idx));
 
 			if (idx)
-				idx[offset+i] = (index >= FW_CLS_TCAM_NUM_ENTRIES
+				idx[offset+i] = (index >= NUM_MPS_CLS_SRAM_L_INSTANCES
 						 ? 0xffff
 						 : index);
-			if (index < FW_CLS_TCAM_NUM_ENTRIES)
+			if (index < NUM_MPS_CLS_SRAM_L_INSTANCES)
 				nfilters++;
 			else if (hash)
 				*hash |= (1ULL << hash_mac_addr(addr[offset+i]));
@@ -4317,7 +4792,7 @@
 	ret = t4_wr_mbox_ns(adap, mbox, &c, sizeof(c), &c);
 	if (ret == 0) {
 		ret = G_FW_VI_MAC_CMD_IDX(ntohs(p->valid_to_idx));
-		if (ret >= FW_CLS_TCAM_NUM_ENTRIES)
+		if (ret >= NUM_MPS_CLS_SRAM_L_INSTANCES)
 			ret = -ENOMEM;
 	}
 	return ret;
@@ -4538,10 +5013,12 @@
 int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl)
 {
 	u8 opcode = *(const u8 *)rpl;
-
-	if (opcode == FW_PORT_CMD) {    /* link/module state change message */
+	const struct fw_port_cmd *p = (const void *)rpl;
+	unsigned int action = G_FW_PORT_CMD_ACTION(ntohl(p->action_to_len16));
+
+	if (opcode == FW_PORT_CMD && action == FW_PORT_ACTION_GET_PORT_INFO) {
+		/* link/module state change message */
 		int speed = 0, fc = 0, i;
-		const struct fw_port_cmd *p = (const void *)rpl;
 		int chan = G_FW_PORT_CMD_PORTID(ntohl(p->op_to_portid));
 		struct port_info *pi = NULL;
 		struct link_config *lc;
@@ -4578,6 +5055,10 @@
 			pi->mod_type = mod;
 			t4_os_portmod_changed(adap, i);
 		}
+	} else {
+		CH_WARN_RATELIMIT(adap,
+		    "Unknown firmware reply 0x%x (0x%x)\n", opcode, action);
+		return -EINVAL;
 	}
 	return 0;
 }
@@ -4704,6 +5185,11 @@
 	get_pci_mode(adapter, &adapter->params.pci);
 
 	adapter->params.rev = t4_read_reg(adapter, A_PL_REV);
+	/* T4A1 chip is no longer supported */
+	if (adapter->params.rev == 1) {
+		CH_ALERT(adapter, "T4 rev 1 chip is no longer supported\n");
+		return -EINVAL;
+	}
 	adapter->params.pci.vpd_cap_addr =
 		t4_os_find_pci_capability(adapter, PCI_CAP_ID_VPD);
 
@@ -4715,9 +5201,11 @@
 	if (ret < 0)
 		return ret;
 
-	if (t4_read_reg(adapter, A_SGE_PC0_REQ_BIST_CMD) != 0xffffffff) {
+	if (t4_read_reg(adapter, A_PCIE_REVISION) != 0) {
+		/* FPGA */
 		adapter->params.cim_la_size = 2 * CIMLA_SIZE;
 	} else {
+		/* ASIC */
 		adapter->params.cim_la_size = CIMLA_SIZE;
 	}
 
@@ -4781,3 +5269,43 @@
 
 	return 0;
 }
+
+int t4_config_scheduler(struct adapter *adapter, int mode, int level,
+			int pktsize, int sched_class, int port, int unit,
+			int rate, int weight, int minrate, int maxrate)
+{
+	struct fw_sched_cmd cmd, rpl;
+
+	if (rate < 0 || unit < 0)
+		return -EINVAL;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.op_to_write = cpu_to_be32(V_FW_CMD_OP(FW_SCHED_CMD) |
+	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
+	cmd.retval_len16 = cpu_to_be32(V_FW_CMD_LEN16(sizeof(cmd)/16));
+
+	cmd.u.params.sc = 1;
+	cmd.u.params.level = level;
+	cmd.u.params.mode = mode;
+	cmd.u.params.ch = port;
+	cmd.u.params.cl = sched_class;
+	cmd.u.params.rate = rate;
+	cmd.u.params.unit = unit;
+
+ 	switch (level) {
+		case FW_SCHED_PARAMS_LEVEL_CH_WRR:
+		case FW_SCHED_PARAMS_LEVEL_CL_WRR:
+			cmd.u.params.weight = cpu_to_be16(weight);
+			break;
+		case FW_SCHED_PARAMS_LEVEL_CH_RL:
+		case FW_SCHED_PARAMS_LEVEL_CL_RL:
+			cmd.u.params.max = cpu_to_be32(maxrate);
+			cmd.u.params.min = cpu_to_be32(minrate);
+			cmd.u.params.pktsize = cpu_to_be16(pktsize);
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd), &rpl, 1);
+}
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/common/t4_msg.h
--- a/head/sys/dev/cxgbe/common/t4_msg.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/common/t4_msg.h	Wed Jul 25 17:04:43 2012 +0300
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: head/sys/dev/cxgbe/common/t4_msg.h 237436 2012-06-22 07:51:15Z np $
  *
  */
 
@@ -37,7 +37,6 @@
 	CPL_SET_TCB           = 0x4,
 	CPL_SET_TCB_FIELD     = 0x5,
 	CPL_GET_TCB           = 0x6,
-	CPL_PCMD              = 0x7,
 	CPL_CLOSE_CON_REQ     = 0x8,
 	CPL_CLOSE_LISTSRV_REQ = 0x9,
 	CPL_ABORT_REQ         = 0xA,
@@ -52,9 +51,12 @@
 	CPL_L2T_READ_REQ      = 0x13,
 	CPL_SMT_WRITE_REQ     = 0x14,
 	CPL_SMT_READ_REQ      = 0x15,
+	CPL_TAG_WRITE_REQ     = 0x16,
 	CPL_BARRIER           = 0x18,
 	CPL_TID_RELEASE       = 0x1A,
-	CPL_RX_MPS_PKT        = 0x1B,
+	CPL_TAG_READ_REQ      = 0x1B,
+	CPL_TX_PKT_FSO        = 0x1E,
+	CPL_TX_PKT_ISO        = 0x1F,
 
 	CPL_CLOSE_LISTSRV_RPL = 0x20,
 	CPL_ERROR             = 0x21,
@@ -66,6 +68,7 @@
 	CPL_RTE_DELETE_RPL    = 0x27,
 	CPL_RTE_WRITE_RPL     = 0x28,
 	CPL_RX_URG_PKT        = 0x29,
+	CPL_TAG_WRITE_RPL     = 0x2A,
 	CPL_ABORT_REQ_RSS     = 0x2B,
 	CPL_RX_URG_NOTIFY     = 0x2C,
 	CPL_ABORT_RPL_RSS     = 0x2D,
@@ -84,7 +87,7 @@
 	CPL_RX_DATA           = 0x39,
 	CPL_SET_TCB_RPL       = 0x3A,
 	CPL_RX_PKT            = 0x3B,
-	CPL_PCMD_RPL          = 0x3C,
+	CPL_TAG_READ_RPL      = 0x3C,
 	CPL_HIT_NOTIFY        = 0x3D,
 	CPL_PKT_NOTIFY        = 0x3E,
 	CPL_RX_DDP_COMPLETE   = 0x3F,
@@ -97,23 +100,34 @@
 	CPL_RX2TX_PKT         = 0x45,
 	CPL_RX_FCOE_DDP       = 0x46,
 	CPL_FCOE_HDR          = 0x47,
+	CPL_T5_TRACE_PKT      = 0x48,
+	CPL_RX_ISCSI_DDP      = 0x49,
+	CPL_RX_FCOE_DIF       = 0x4A,
+	CPL_RX_DATA_DIF       = 0x4B,
 
 	CPL_RDMA_READ_REQ     = 0x60,
+	CPL_RX_ISCSI_DIF      = 0x60,
 
 	CPL_SET_LE_REQ        = 0x80,
 	CPL_PASS_OPEN_REQ6    = 0x81,
 	CPL_ACT_OPEN_REQ6     = 0x83,
 
-	CPL_TX_DMA_ACK        = 0xA0,
 	CPL_RDMA_TERMINATE    = 0xA2,
 	CPL_RDMA_WRITE        = 0xA4,
 	CPL_SGE_EGR_UPDATE    = 0xA5,
 	CPL_SET_LE_RPL        = 0xA6,
 	CPL_FW2_MSG           = 0xA7,
 	CPL_FW2_PLD           = 0xA8,
+	CPL_T5_RDMA_READ_REQ  = 0xA9,
+	CPL_RDMA_ATOMIC_REQ   = 0xAA,
+	CPL_RDMA_ATOMIC_RPL   = 0xAB,
+	CPL_RDMA_IMM_DATA     = 0xAC,
+	CPL_RDMA_IMM_DATA_SE  = 0xAD,
 
 	CPL_TRACE_PKT         = 0xB0,
 	CPL_RX2TX_DATA        = 0xB1,
+	CPL_ISCSI_DATA        = 0xB2,
+	CPL_FCOE_DATA         = 0xB3,
 
 	CPL_FW4_MSG           = 0xC0,
 	CPL_FW4_PLD           = 0xC1,
@@ -278,13 +292,13 @@
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	__u8 :4;
 	__u8 unknown:1;
-	__u8 :1;
+	__u8 ecn:1;
 	__u8 sack:1;
 	__u8 tstamp:1;
 #else
 	__u8 tstamp:1;
 	__u8 sack:1;
-	__u8 :1;
+	__u8 ecn:1;
 	__u8 unknown:1;
 	__u8 :4;
 #endif
@@ -625,7 +639,7 @@
 #define G_IP_HDR_LEN(x) (((x) >> S_IP_HDR_LEN) & M_IP_HDR_LEN)
 
 #define S_ETH_HDR_LEN    26
-#define M_ETH_HDR_LEN    0x1F
+#define M_ETH_HDR_LEN    0x3F
 #define V_ETH_HDR_LEN(x) ((x) << S_ETH_HDR_LEN)
 #define G_ETH_HDR_LEN(x) (((x) >> S_ETH_HDR_LEN) & M_ETH_HDR_LEN)
 
@@ -663,34 +677,18 @@
 	__be32 opt2;
 };
 
-/* cpl_act_open_req.params fields XXX */
-#define S_AOPEN_VLAN_PRI    9
-#define M_AOPEN_VLAN_PRI    0x3
-#define V_AOPEN_VLAN_PRI(x) ((x) << S_AOPEN_VLAN_PRI)
-#define G_AOPEN_VLAN_PRI(x) (((x) >> S_AOPEN_VLAN_PRI) & M_AOPEN_VLAN_PRI)
-
-#define S_AOPEN_VLAN_PRI_VALID    11
-#define V_AOPEN_VLAN_PRI_VALID(x) ((x) << S_AOPEN_VLAN_PRI_VALID)
-#define F_AOPEN_VLAN_PRI_VALID    V_AOPEN_VLAN_PRI_VALID(1U)
-
-#define S_AOPEN_PKT_TYPE    12
-#define M_AOPEN_PKT_TYPE    0x3
-#define V_AOPEN_PKT_TYPE(x) ((x) << S_AOPEN_PKT_TYPE)
-#define G_AOPEN_PKT_TYPE(x) (((x) >> S_AOPEN_PKT_TYPE) & M_AOPEN_PKT_TYPE)
-
-#define S_AOPEN_MAC_MATCH    14
-#define M_AOPEN_MAC_MATCH    0x1F
-#define V_AOPEN_MAC_MATCH(x) ((x) << S_AOPEN_MAC_MATCH)
-#define G_AOPEN_MAC_MATCH(x) (((x) >> S_AOPEN_MAC_MATCH) & M_AOPEN_MAC_MATCH)
-
-#define S_AOPEN_MAC_MATCH_VALID    19
-#define V_AOPEN_MAC_MATCH_VALID(x) ((x) << S_AOPEN_MAC_MATCH_VALID)
-#define F_AOPEN_MAC_MATCH_VALID    V_AOPEN_MAC_MATCH_VALID(1U)
-
-#define S_AOPEN_IFF_VLAN    20
-#define M_AOPEN_IFF_VLAN    0xFFF
-#define V_AOPEN_IFF_VLAN(x) ((x) << S_AOPEN_IFF_VLAN)
-#define G_AOPEN_IFF_VLAN(x) (((x) >> S_AOPEN_IFF_VLAN) & M_AOPEN_IFF_VLAN)
+struct cpl_t5_act_open_req {
+	WR_HDR;
+	union opcode_tid ot;
+	__be16 local_port;
+	__be16 peer_port;
+	__be32 local_ip;
+	__be32 peer_ip;
+	__be64 opt0;
+	__be32 rsvd;
+	__be32 opt2;
+	__be64 params;
+};
 
 struct cpl_act_open_req6 {
 	WR_HDR;
@@ -706,6 +704,21 @@
 	__be32 opt2;
 };
 
+struct cpl_t5_act_open_req6 {
+	WR_HDR;
+	union opcode_tid ot;
+	__be16 local_port;
+	__be16 peer_port;
+	__be64 local_ip_hi;
+	__be64 local_ip_lo;
+	__be64 peer_ip_hi;
+	__be64 peer_ip_lo;
+	__be64 opt0;
+	__be32 rsvd;
+	__be32 opt2;
+	__be64 params;
+};
+
 struct cpl_act_open_rpl {
 	RSS_HDR
 	union opcode_tid ot;
@@ -1051,6 +1064,10 @@
 #define V_TXPKT_LOOPBACK(x) ((x) << S_TXPKT_LOOPBACK)
 #define F_TXPKT_LOOPBACK    V_TXPKT_LOOPBACK(1U)
 
+#define S_TXPKT_TSTAMP    23
+#define V_TXPKT_TSTAMP(x) ((x) << S_TXPKT_TSTAMP)
+#define F_TXPKT_TSTAMP    V_TXPKT_TSTAMP(1U)
+
 #define S_TXPKT_OPCODE    24
 #define M_TXPKT_OPCODE    0xFF
 #define V_TXPKT_OPCODE(x) ((x) << S_TXPKT_OPCODE)
@@ -1113,7 +1130,7 @@
 #define V_TXPKT_L4CSUM_DIS(x) ((__u64)(x) << S_TXPKT_L4CSUM_DIS)
 #define F_TXPKT_L4CSUM_DIS    V_TXPKT_L4CSUM_DIS(1ULL)
 
-struct cpl_tx_pkt_lso {
+struct cpl_tx_pkt_lso_core {
 	__be32 lso_ctrl;
 	__be16 ipid_ofst;
 	__be16 mss;
@@ -1122,7 +1139,29 @@
 	/* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */
 };
 
-/* cpl_tx_pkt_lso.lso_ctrl fields */
+struct cpl_tx_pkt_lso {
+	WR_HDR;
+	struct cpl_tx_pkt_lso_core c;
+	/* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */
+};
+
+struct cpl_tx_pkt_ufo_core {
+	__be16 ethlen;
+	__be16 iplen;
+	__be16 udplen;
+	__be16 mss;
+	__be32 len;
+	__be32 r1;
+	/* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */
+};
+
+struct cpl_tx_pkt_ufo {
+	WR_HDR;
+	struct cpl_tx_pkt_ufo_core c;
+	/* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */
+};
+
+/* cpl_tx_pkt_lso_core.lso_ctrl fields */
 #define S_LSO_TCPHDR_LEN    0
 #define M_LSO_TCPHDR_LEN    0xF
 #define V_LSO_TCPHDR_LEN(x) ((x) << S_LSO_TCPHDR_LEN)
@@ -1159,7 +1198,7 @@
 #define V_LSO_OPCODE(x) ((x) << S_LSO_OPCODE)
 #define G_LSO_OPCODE(x) (((x) >> S_LSO_OPCODE) & M_LSO_OPCODE)
 
-/* cpl_tx_pkt_lso.mss fields */
+/* cpl_tx_pkt_lso_core.mss fields */
 #define S_LSO_MSS    0
 #define M_LSO_MSS    0x3FFF
 #define V_LSO_MSS(x) ((x) << S_LSO_MSS)
@@ -1169,31 +1208,29 @@
 #define V_LSO_IPID_SPLIT(x) ((x) << S_LSO_IPID_SPLIT)
 #define F_LSO_IPID_SPLIT    V_LSO_IPID_SPLIT(1U)
 
-struct cpl_tx_pkt_coalesce {
-	__be32 cntrl;
+struct cpl_tx_pkt_fso {
+	WR_HDR;
+	__be32 fso_ctrl;
+	__be16 seqcnt_ofst;
+	__be16 mtu;
+	__be32 param_offset;
 	__be32 len;
-	__be64 addr;
+	/* encapsulated CPL (TX_PKT or TX_PKT_XT) follows here */
 };
 
-struct tx_pkt_coalesce_wr {
-	WR_HDR;
-#if !(defined C99_NOT_SUPPORTED)
-	struct cpl_tx_pkt_coalesce cpl[0];
-#endif
-};
+/* cpl_tx_pkt_fso.fso_ctrl fields different from cpl_tx_pkt_lso.lso_ctrl */
+#define S_FSO_XCHG_CLASS    21
+#define V_FSO_XCHG_CLASS(x) ((x) << S_FSO_XCHG_CLASS)
+#define F_FSO_XCHG_CLASS    V_FSO_XCHG_CLASS(1U)
 
-struct mngt_pktsched_wr {
-	__be32 wr_hi;
-	__be32 wr_lo;
-	__u8  mngt_opcode;
-	__u8  rsvd[7];
-	__u8  sched;
-	__u8  idx;
-	__u8  min;
-	__u8  max;
-	__u8  binding;
-	__u8  rsvd1[3];
-};
+#define S_FSO_INITIATOR    20
+#define V_FSO_INITIATOR(x) ((x) << S_FSO_INITIATOR)
+#define F_FSO_INITIATOR    V_FSO_INITIATOR(1U)
+
+#define S_FSO_FCHDR_LEN    12
+#define M_FSO_FCHDR_LEN    0xF
+#define V_FSO_FCHDR_LEN(x) ((x) << S_FSO_FCHDR_LEN)
+#define G_FSO_FCHDR_LEN(x) (((x) >> S_FSO_FCHDR_LEN) & M_FSO_FCHDR_LEN)
 
 struct cpl_iscsi_hdr_no_rss {
 	union opcode_tid ot;
@@ -1205,6 +1242,40 @@
 	__u8 status;
 };
 
+struct cpl_tx_data_iso {
+	WR_HDR;
+	__be32 iso_ctrl;
+	__u8   rsvd;
+	__u8   ahs_len;
+	__be16 mss;
+	__be32 burst_size;
+	__be32 len;
+	/* encapsulated CPL_TX_DATA follows here */
+};
+
+/* cpl_tx_data_iso.iso_ctrl fields different from cpl_tx_pkt_lso.lso_ctrl */
+#define S_ISO_CPLHDR_LEN    18
+#define M_ISO_CPLHDR_LEN    0xF
+#define V_ISO_CPLHDR_LEN(x) ((x) << S_ISO_CPLHDR_LEN)
+#define G_ISO_CPLHDR_LEN(x) (((x) >> S_ISO_CPLHDR_LEN) & M_ISO_CPLHDR_LEN)
+
+#define S_ISO_HDR_CRC    17
+#define V_ISO_HDR_CRC(x) ((x) << S_ISO_HDR_CRC)
+#define F_ISO_HDR_CRC    V_ISO_HDR_CRC(1U)
+
+#define S_ISO_DATA_CRC    16
+#define V_ISO_DATA_CRC(x) ((x) << S_ISO_DATA_CRC)
+#define F_ISO_DATA_CRC    V_ISO_DATA_CRC(1U)
+
+#define S_ISO_IMD_DATA_EN    15
+#define V_ISO_IMD_DATA_EN(x) ((x) << S_ISO_IMD_DATA_EN)
+#define F_ISO_IMD_DATA_EN    V_ISO_IMD_DATA_EN(1U)
+
+#define S_ISO_PDU_TYPE    13
+#define M_ISO_PDU_TYPE    0x3
+#define V_ISO_PDU_TYPE(x) ((x) << S_ISO_PDU_TYPE)
+#define G_ISO_PDU_TYPE(x) (((x) >> S_ISO_PDU_TYPE) & M_ISO_PDU_TYPE)
+
 struct cpl_iscsi_hdr {
 	RSS_HDR
 	union opcode_tid ot;
@@ -1226,6 +1297,17 @@
 #define V_ISCSI_DDP(x) ((x) << S_ISCSI_DDP)
 #define F_ISCSI_DDP    V_ISCSI_DDP(1U)
 
+struct cpl_iscsi_data {
+	RSS_HDR
+	union opcode_tid ot;
+	__u8 rsvd0[2];
+	__be16 len;
+	__be32 seq;
+	__be16 urg;
+	__u8 rsvd1;
+	__u8 status;
+};
+
 struct cpl_rx_data {
 	RSS_HDR
 	union opcode_tid ot;
@@ -1265,6 +1347,16 @@
 	__be32 param;
 };
 
+struct cpl_fcoe_data {
+	RSS_HDR
+	union opcode_tid ot;
+	__u8 rsvd0[2];
+	__be16 len;
+	__be32 seq;
+	__u8 rsvd1[3];
+	__u8 status;
+};
+
 struct cpl_rx_urg_notify {
 	RSS_HDR
 	union opcode_tid ot;
@@ -1333,6 +1425,8 @@
 	__be32 ddpvld;
 };
 
+#define cpl_rx_iscsi_ddp cpl_rx_data_ddp
+
 struct cpl_rx_fcoe_ddp {
 	RSS_HDR
 	union opcode_tid ot;
@@ -1344,7 +1438,49 @@
 	__be32 ddpvld;
 };
 
-/* cpl_rx_{data,fcoe}_ddp.ddpvld fields */
+struct cpl_rx_data_dif {
+	RSS_HDR
+	union opcode_tid ot;
+	__be16 ddp_len;
+	__be16 msg_len;
+	__be32 seq;
+	union {
+		__be32 nxt_seq;
+		__be32 ddp_report;
+	} u;
+	__be32 err_vec;
+	__be32 ddpvld;
+};
+
+struct cpl_rx_iscsi_dif {
+	RSS_HDR
+	union opcode_tid ot;
+	__be16 ddp_len;
+	__be16 msg_len;
+	__be32 seq;
+	union {
+		__be32 nxt_seq;
+		__be32 ddp_report;
+	} u;
+	__be32 ulp_crc;
+	__be32 ddpvld;
+	__u8 rsvd0[8];
+	__be32 err_vec;
+	__u8 rsvd1[4];
+};
+
+struct cpl_rx_fcoe_dif {
+	RSS_HDR
+	union opcode_tid ot;
+	__be16 ddp_len;
+	__be16 msg_len;
+	__be32 seq;
+	__be32 ddp_report;
+	__be32 err_vec;
+	__be32 ddpvld;
+};
+
+/* cpl_rx_{data,iscsi,fcoe}_{ddp,dif}.ddpvld fields */
 #define S_DDP_VALID    15
 #define M_DDP_VALID    0x1FFFF
 #define V_DDP_VALID(x) ((x) << S_DDP_VALID)
@@ -1407,7 +1543,7 @@
 #define V_DDP_ULP_MODE(x) ((x) << S_DDP_ULP_MODE)
 #define G_DDP_ULP_MODE(x) (((x) >> S_DDP_ULP_MODE) & M_DDP_ULP_MODE)
 
-/* cpl_rx_{data,fcoe}_ddp.ddp_report fields */
+/* cpl_rx_{data,iscsi,fcoe}_{ddp,dif}.ddp_report fields */
 #define S_DDP_OFFSET    0
 #define M_DDP_OFFSET    0xFFFFFF
 #define V_DDP_OFFSET(x) ((x) << S_DDP_OFFSET)
@@ -1472,16 +1608,31 @@
 #define V_RX_ETHHDR_LEN(x) ((x) << S_RX_ETHHDR_LEN)
 #define G_RX_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_ETHHDR_LEN)
 
+#define S_RX_T5_ETHHDR_LEN    0
+#define M_RX_T5_ETHHDR_LEN    0x3F
+#define V_RX_T5_ETHHDR_LEN(x) ((x) << S_RX_T5_ETHHDR_LEN)
+#define G_RX_T5_ETHHDR_LEN(x) (((x) >> S_RX_T5_ETHHDR_LEN) & M_RX_T5_ETHHDR_LEN)
+
 #define S_RX_PKTYPE    5
 #define M_RX_PKTYPE    0x7
 #define V_RX_PKTYPE(x) ((x) << S_RX_PKTYPE)
 #define G_RX_PKTYPE(x) (((x) >> S_RX_PKTYPE) & M_RX_PKTYPE)
 
+#define S_RX_T5_DATYPE    6
+#define M_RX_T5_DATYPE    0x3
+#define V_RX_T5_DATYPE(x) ((x) << S_RX_T5_DATYPE)
+#define G_RX_T5_DATYPE(x) (((x) >> S_RX_T5_DATYPE) & M_RX_T5_DATYPE)
+
 #define S_RX_MACIDX    8
 #define M_RX_MACIDX    0x1FF
 #define V_RX_MACIDX(x) ((x) << S_RX_MACIDX)
 #define G_RX_MACIDX(x) (((x) >> S_RX_MACIDX) & M_RX_MACIDX)
 
+#define S_RX_T5_PKTYPE    17
+#define M_RX_T5_PKTYPE    0x7
+#define V_RX_T5_PKTYPE(x) ((x) << S_RX_T5_PKTYPE)
+#define G_RX_T5_PKTYPE(x) (((x) >> S_RX_T5_PKTYPE) & M_RX_T5_PKTYPE)
+
 #define S_RX_DATYPE    18
 #define M_RX_DATYPE    0x3
 #define V_RX_DATYPE(x) ((x) << S_RX_DATYPE)
@@ -1614,6 +1765,29 @@
 	__be64 tstamp;
 };
 
+struct cpl_t5_trace_pkt {
+	RSS_HDR
+	__u8 opcode;
+	__u8 intf;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8 runt:4;
+	__u8 filter_hit:4;
+	__u8 :6;
+	__u8 err:1;
+	__u8 trunc:1;
+#else
+	__u8 filter_hit:4;
+	__u8 runt:4;
+	__u8 trunc:1;
+	__u8 err:1;
+	__u8 :6;
+#endif
+	__be16 rsvd;
+	__be16 len;
+	__be64 tstamp;
+	__be64 rsvd1;
+};
+
 struct cpl_rte_delete_req {
 	WR_HDR;
 	union opcode_tid ot;
@@ -1764,6 +1938,31 @@
 	__u8   src_mac0[6];
 };
 
+struct cpl_smt_write_rpl {
+	RSS_HDR
+	union opcode_tid ot;
+	__u8 status;
+	__u8 rsvd[3];
+};
+
+struct cpl_smt_read_req {
+	WR_HDR;
+	union opcode_tid ot;
+	__be32 params;
+};
+
+struct cpl_smt_read_rpl {
+	RSS_HDR
+	union opcode_tid ot;
+	__u8   status;
+	__u8   ovlan_idx;
+	__be16 rsvd;
+	__be16 pfvf1;
+	__u8   src_mac1[6];
+	__be16 pfvf0;
+	__u8   src_mac0[6];
+};
+
 /* cpl_smt_{read,write}_req.params fields */
 #define S_SMTW_OVLAN_IDX    16
 #define M_SMTW_OVLAN_IDX    0xF
@@ -1794,31 +1993,65 @@
 #define V_SMTW_VF_VLD(x) ((x) << S_SMTW_VF_VLD)
 #define F_SMTW_VF_VLD    V_SMTW_VF_VLD(1U)
 
-struct cpl_smt_write_rpl {
+struct cpl_tag_write_req {
+	WR_HDR;
+	union opcode_tid ot;
+	__be32 params;
+	__be64 tag_val;
+};
+
+struct cpl_tag_write_rpl {
 	RSS_HDR
 	union opcode_tid ot;
 	__u8 status;
-	__u8 rsvd[3];
+	__u8 rsvd[2];
+	__u8 idx;
 };
 
-struct cpl_smt_read_req {
+struct cpl_tag_read_req {
 	WR_HDR;
 	union opcode_tid ot;
 	__be32 params;
 };
 
-struct cpl_smt_read_rpl {
+struct cpl_tag_read_rpl {
 	RSS_HDR
 	union opcode_tid ot;
 	__u8   status;
-	__u8   ovlan_idx;
-	__be16 rsvd;
-	__be16 pfvf1;
-	__u8   src_mac1[6];
-	__be16 pfvf0;
-	__u8   src_mac0[6];
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8 :4;
+	__u8 tag_len:1;
+	__u8 :2;
+	__u8 ins_enable:1;
+#else
+	__u8 ins_enable:1;
+	__u8 :2;
+	__u8 tag_len:1;
+	__u8 :4;
+#endif
+	__u8   rsvd;
+	__u8   tag_idx;
+	__be64 tag_val;
 };
 
+/* cpl_tag{read,write}_req.params fields */
+#define S_TAGW_IDX    0
+#define M_TAGW_IDX    0x7F
+#define V_TAGW_IDX(x) ((x) << S_TAGW_IDX)
+#define G_TAGW_IDX(x) (((x) >> S_TAGW_IDX) & M_TAGW_IDX)
+
+#define S_TAGW_LEN    20
+#define V_TAGW_LEN(x) ((x) << S_TAGW_LEN)
+#define F_TAGW_LEN    V_TAGW_LEN(1U)
+
+#define S_TAGW_INS_ENABLE    23
+#define V_TAGW_INS_ENABLE(x) ((x) << S_TAGW_INS_ENABLE)
+#define F_TAGW_INS_ENABLE    V_TAGW_INS_ENABLE(1U)
+
+#define S_TAGW_NORPL    31
+#define V_TAGW_NORPL(x) ((x) << S_TAGW_NORPL)
+#define F_TAGW_NORPL    V_TAGW_NORPL(1U)
+
 struct cpl_barrier {
 	WR_HDR;
 	__u8 opcode;
@@ -1882,6 +2115,16 @@
 #define V_NTFY_ETHHDR_LEN(x) ((x) << S_NTFY_ETHHDR_LEN)
 #define G_NTFY_ETHHDR_LEN(x) (((x) >> S_NTFY_ETHHDR_LEN) & M_NTFY_ETHHDR_LEN)
 
+#define S_NTFY_T5_IPHDR_LEN    18
+#define M_NTFY_T5_IPHDR_LEN    0xFF
+#define V_NTFY_T5_IPHDR_LEN(x) ((x) << S_NTFY_T5_IPHDR_LEN)
+#define G_NTFY_T5_IPHDR_LEN(x) (((x) >> S_NTFY_T5_IPHDR_LEN) & M_NTFY_T5_IPHDR_LEN)
+
+#define S_NTFY_T5_ETHHDR_LEN    26
+#define M_NTFY_T5_ETHHDR_LEN    0x3F
+#define V_NTFY_T5_ETHHDR_LEN(x) ((x) << S_NTFY_T5_ETHHDR_LEN)
+#define G_NTFY_T5_ETHHDR_LEN(x) (((x) >> S_NTFY_T5_ETHHDR_LEN) & M_NTFY_T5_ETHHDR_LEN)
+
 struct cpl_rdma_terminate {
 	RSS_HDR
 	union opcode_tid ot;
@@ -2011,12 +2254,18 @@
 	union opcode_tid ot;
 	u8 credits;
 	u8 rsvd0[2];
-	u8 seq_vld;
+	u8 flags;
 	__be32 snd_nxt;
 	__be32 snd_una;
 	__be64 rsvd1;
 };
 
+enum {
+	CPL_FW4_ACK_FLAGS_SEQVAL	= 0x1,	/* seqn valid */
+	CPL_FW4_ACK_FLAGS_CH		= 0x2,	/* channel change complete */
+	CPL_FW4_ACK_FLAGS_FLOWC		= 0x4,	/* fw_flowc_wr complete */
+};
+
 struct cpl_fw6_msg {
 	RSS_HDR
 	u8 opcode;
@@ -2029,6 +2278,17 @@
 /* cpl_fw6_msg.type values */
 enum {
 	FW6_TYPE_CMD_RPL = 0,
+	FW6_TYPE_WR_RPL = 1,
+	FW6_TYPE_CQE = 2,
+	FW6_TYPE_OFLD_CONNECTION_WR_RPL = 3,
+};
+
+struct cpl_fw6_msg_ofld_connection_wr_rpl {
+	__u64	cookie;
+	__be32	tid;	/* or atid in case of active failure */
+	__u8	t_state;
+	__u8	retval;
+	__u8	rsvd[2];
 };
 
 /* ULP_TX opcodes */
@@ -2135,4 +2395,8 @@
 #define M_ULP_TXPKT_FID     0x7ff
 #define V_ULP_TXPKT_FID(x)  ((x) << S_ULP_TXPKT_FID)
 
+#define S_ULP_TXPKT_RO      3
+#define V_ULP_TXPKT_RO(x) ((x) << S_ULP_TXPKT_RO)
+#define F_ULP_TXPKT_RO V_ULP_TXPKT_RO(1U)
+
 #endif  /* T4_MSG_H */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/firmware/t4fw_cfg.txt
--- a/head/sys/dev/cxgbe/firmware/t4fw_cfg.txt	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/firmware/t4fw_cfg.txt	Wed Jul 25 17:04:43 2012 +0300
@@ -1,8 +1,6 @@
 # Firmware configuration file.
 #
 # Global limits (some are hardware limits, others are due to the firmware).
-# Also note that the firmware reserves some of these resources for its own use
-# so it's not always possible for the drivers to grab everything listed here.
 # nvi = 128		virtual interfaces
 # niqflint = 1023	ingress queues with freelists and/or interrupts
 # nethctrl = 64K	Ethernet or ctrl egress queues
@@ -58,11 +56,11 @@
 [function "4"]
 	wx_caps = all
 	r_caps = all
-	nvi = 48
+	nvi = 54
 	niqflint = 256
 	nethctrl = 128
 	neq = 256
-	nexactf = 300
+	nexactf = 328
 	cmask = all
 	pmask = all
 
@@ -76,7 +74,7 @@
 	# TCAM iff there is room left (that is, the rest don't add up to 2048).
 	nroute = 32
 	nclip = 0	# needed only for IPv6 offload
-	nfilter = 1504
+	nfilter = 1488
 	nserver = 512
 	nhash = 16384
 
@@ -90,6 +88,18 @@
 [function "6"]
 	nvi = 1
 
+# The following function, 1023, is not an actual PCIE function but is used to
+# configure and reserve firmware internal resources that come from the global
+# resource pool.
+[function "1023"]
+	wx_caps = all
+	r_caps = all
+	nvi = 4
+	cmask = all
+	pmask = all
+	nexactf = 8
+	nfilter = 16
+
 # MPS has 192K buffer space for ingress packets from the wire as well as
 # loopback path of the L2 switch.
 [port "0"]
@@ -126,7 +136,7 @@
 
 [fini]
 	version = 0x1
-	checksum = 0xb31cdfac
+	checksum = 0xdb5813f9
 #
-# $FreeBSD: head/sys/dev/cxgbe/firmware/t4fw_cfg.txt 228561 2011-12-16 02:09:51Z np $
+# $FreeBSD: head/sys/dev/cxgbe/firmware/t4fw_cfg.txt 237436 2012-06-22 07:51:15Z np $
 #
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt
--- a/head/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt	Wed Jul 25 17:04:43 2012 +0300
@@ -1,7 +1,10 @@
 # Chelsio T4 Factory Default configuration file.
 #
-# Copyright (C) 2010 Chelsio Communications.  All rights reserved.
+# Copyright (C) 2010-2012 Chelsio Communications.  All rights reserved.
 #
+#   DO NOT MODIFY THIS FILE UNDER ANY CIRCUMSTANCES.  MODIFICATION OF
+#   THIS FILE WILL RESULT IN A NON-FUNCTIONAL T4 ADAPTER AND MAY RESULT
+#   IN PHYSICAL DAMAGE TO T4 ADAPTERS.
 
 # This file provides the default, power-on configuration for 4-port T4-based
 # adapters shipped from the factory.  These defaults are designed to address
@@ -112,7 +115,7 @@
 	# must sum to <= 36): { IP Fragment (1), MPS Match Type (3),
 	# IP Protocol (8), [Inner] VLAN (17), Port (3), FCoE (1) }
 	#
-	filterMode = fragmentation, mpshittype, protocol, vnic_id, port, fcoe
+	filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe
 
 	# Percentage of dynamic memory (in either the EDRAM or external MEM)
 	# to use for TP RX payload
@@ -339,18 +342,19 @@
 	r_caps = all		# read permissions for all commands
 	nvi = 28		# NVI_UNIFIED
 	niqflint = 170		# NFLIQ_UNIFIED + NLFIQ_WD
-	nethctrl = 96 		# NETHCTRL_UNIFIED + NETHCTRL_WD
-	neq = 252		# NEQ_UNIFIED + NEQ_WD
+	nethctrl = 100		# NETHCTRL_UNIFIED + NETHCTRL_WD
+	neq = 256		# NEQ_UNIFIED + NEQ_WD
 	nexactf = 40		# NMPSTCAM_UNIFIED
 	cmask = all		# access to all channels
 	pmask = all		# access to all four ports ...
+	nethofld = 1024		# number of user mode ethernet flow contexts
 	nroute = 32		# number of routing region entries
 	nclip = 32		# number of clip region entries
-	nfilter = 768		# number of filter region entries
-	nserver = 256		# number of server region entries
-	nhash = 0		# number of hash region entries
+	nfilter = 496		# number of filter region entries
+	nserver = 496		# number of server region entries
+	nhash = 12288		# number of hash region entries
 	protocol = nic_vm, ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu
-	tp_l2t = 100
+	tp_l2t = 3072
 	tp_ddp = 2
 	tp_ddp_iscsi = 2
 	tp_stag = 2
@@ -371,6 +375,15 @@
 	nexactf = 4		# NPORTS
 	cmask = all		# access to all channels
 	pmask = all		# access to all four ports ...
+	nserver = 16
+	nhash = 2048
+	tp_l2t = 1024
+	protocol = iscsi_initiator_fofld
+	tp_ddp_iscsi = 2
+	iscsi_ntask = 2048
+	iscsi_nsess = 2048
+	iscsi_nconn_per_session = 1
+	iscsi_ninitiator_instance = 64
 
 [function "6"]
 	wx_caps = all		# write/execute permissions for all commands
@@ -384,13 +397,26 @@
 				# and we never load PF0..3 and PF4 concurrently
 	cmask = all		# access to all channels
 	pmask = all		# access to all four ports ...
-	nhash = 0 
+	nhash = 2048
 	protocol = fcoe_initiator
 	tp_ddp = 2
 	fcoe_nfcf = 16
 	fcoe_nvnp = 32
 	fcoe_nssn = 1024
 
+# The following function, 1023, is not an actual PCIE function but is used to
+# configure and reserve firmware internal resources that come from the global
+# resource pool.
+#
+[function "1023"]
+	wx_caps = all		# write/execute permissions for all commands
+	r_caps = all		# read permissions for all commands
+	nvi = 4			# NVI_UNIFIED
+	cmask = all		# access to all channels
+	pmask = all		# access to all four ports ...
+	nexactf = 8		# NPORTS + DCBX +
+	nfilter = 16		# number of filter region entries
+
 # For Virtual functions, we only allow NIC functionality and we only allow
 # access to one port (1 << PF).  Note that because of limitations in the
 # Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL
@@ -488,8 +514,8 @@
 	dwm = 30
 
 [fini]
-	version = 0x14250007
-	checksum = 0xfcbadefb
+	version = 0x1425000b
+	checksum = 0x7690f7a5
 
 # Total resources used by above allocations:
 #   Virtual Interfaces: 104
@@ -499,5 +525,5 @@
 #   MSI-X Vectors: 736
 #   Virtual Functions: 64
 #
-# $FreeBSD: head/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt 228561 2011-12-16 02:09:51Z np $
+# $FreeBSD: head/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt 237436 2012-06-22 07:51:15Z np $
 #
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/firmware/t4fw_interface.h
--- a/head/sys/dev/cxgbe/firmware/t4fw_interface.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/firmware/t4fw_interface.h	Wed Jul 25 17:04:43 2012 +0300
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2011 Chelsio Communications, Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/cxgbe/firmware/t4fw_interface.h 228561 2011-12-16 02:09:51Z np $
+ * $FreeBSD: head/sys/dev/cxgbe/firmware/t4fw_interface.h 237436 2012-06-22 07:51:15Z np $
  *
  */
 
@@ -83,7 +83,9 @@
 	FW_TP_WR		= 0x05,
 	FW_ETH_TX_PKT_WR	= 0x08,
 	FW_ETH_TX_PKTS_WR	= 0x09,
+	FW_ETH_TX_UO_WR		= 0x1c,
 	FW_EQ_FLUSH_WR		= 0x1b,
+	FW_OFLD_CONNECTION_WR	= 0x2f,
 	FW_FLOWC_WR		= 0x0a,
 	FW_OFLD_TX_DATA_WR	= 0x0b,
 	FW_CMD_WR		= 0x10,
@@ -96,9 +98,23 @@
 	FW_RI_BIND_MW_WR	= 0x18,
 	FW_RI_FR_NSMR_WR	= 0x19,
 	FW_RI_INV_LSTAG_WR	= 0x1a,
+	FW_RI_SEND_IMMEDIATE_WR	= 0x15,
+	FW_RI_ATOMIC_WR		= 0x16,
 	FW_RI_WR		= 0x0d,
-	FW_ISCSI_NODE_WR	= 0x4a,
-	FW_LASTC2E_WR		= 0x50
+	FW_CHNET_IFCONF_WR	= 0x6b,
+	FW_RDEV_WR		= 0x38,
+	FW_FOISCSI_NODE_WR	= 0x60,
+	FW_FOISCSI_CTRL_WR	= 0x6a,
+	FW_FOISCSI_CHAP_WR	= 0x6c,
+	FW_FCOE_ELS_CT_WR	= 0x30,
+	FW_SCSI_WRITE_WR	= 0x31,
+	FW_SCSI_READ_WR		= 0x32,
+	FW_SCSI_CMD_WR		= 0x33,
+	FW_SCSI_ABRT_CLS_WR	= 0x34,
+	FW_SCSI_TGT_ACC_WR	= 0x35,
+	FW_SCSI_TGT_XMIT_WR	= 0x36,
+	FW_SCSI_TGT_RSP_WR	= 0x37,
+	FW_LASTC2E_WR		= 0x70
 };
 
 /*
@@ -536,6 +552,18 @@
 	__u8   type;
 };
 
+struct fw_eth_tx_uo_wr {
+	__be32 op_immdlen;
+	__be32 equiq_to_len16;
+	__be64 r3;
+	__be16 ethlen;
+	__be16 iplen;
+	__be16 udplen;
+	__be16 mss;
+	__be32 length;
+	__be32 r4;
+};
+
 struct fw_eq_flush_wr {
 	__u8   opcode;
 	__u8   r1[3];
@@ -543,6 +571,142 @@
 	__be64 r3;
 };
 
+struct fw_ofld_connection_wr {
+	__be32 op_compl;
+	__be32 len16_pkd;
+	__u64  cookie;
+	__be64 r2;
+	__be64 r3;
+	struct fw_ofld_connection_le {
+		__be32 version_cpl;
+		__be32 filter;
+		__be32 r1;
+		__be16 lport;
+		__be16 pport;
+		union fw_ofld_connection_leip {
+			struct fw_ofld_connection_le_ipv4 {
+				__be32 pip;
+				__be32 lip;
+				__be64 r0;
+				__be64 r1;
+				__be64 r2;
+			} ipv4;
+			struct fw_ofld_connection_le_ipv6 {
+				__be64 pip_hi;
+				__be64 pip_lo;
+				__be64 lip_hi;
+				__be64 lip_lo;
+			} ipv6;
+		} u;
+	} le;
+	struct fw_ofld_connection_tcb {
+		__be32 t_state_to_astid;
+		__be16 cplrxdataack_cplpassacceptrpl;
+		__be16 rcv_adv;
+		__be32 rcv_nxt;
+		__be32 tx_max;
+		__be64 opt0;
+		__be32 opt2;
+		__be32 r1;
+		__be64 r2;
+		__be64 r3;
+	} tcb;
+};
+
+#define S_FW_OFLD_CONNECTION_WR_VERSION		31
+#define M_FW_OFLD_CONNECTION_WR_VERSION		0x1
+#define V_FW_OFLD_CONNECTION_WR_VERSION(x)	\
+    ((x) << S_FW_OFLD_CONNECTION_WR_VERSION)
+#define G_FW_OFLD_CONNECTION_WR_VERSION(x)	\
+    (((x) >> S_FW_OFLD_CONNECTION_WR_VERSION) & \
+     M_FW_OFLD_CONNECTION_WR_VERSION)
+#define F_FW_OFLD_CONNECTION_WR_VERSION	V_FW_OFLD_CONNECTION_WR_VERSION(1U)
+
+#define S_FW_OFLD_CONNECTION_WR_CPL	30
+#define M_FW_OFLD_CONNECTION_WR_CPL	0x1
+#define V_FW_OFLD_CONNECTION_WR_CPL(x)	((x) << S_FW_OFLD_CONNECTION_WR_CPL)
+#define G_FW_OFLD_CONNECTION_WR_CPL(x)	\
+    (((x) >> S_FW_OFLD_CONNECTION_WR_CPL) & M_FW_OFLD_CONNECTION_WR_CPL)
+#define F_FW_OFLD_CONNECTION_WR_CPL	V_FW_OFLD_CONNECTION_WR_CPL(1U)
+
+#define S_FW_OFLD_CONNECTION_WR_T_STATE		28
+#define M_FW_OFLD_CONNECTION_WR_T_STATE		0xf
+#define V_FW_OFLD_CONNECTION_WR_T_STATE(x)	\
+    ((x) << S_FW_OFLD_CONNECTION_WR_T_STATE)
+#define G_FW_OFLD_CONNECTION_WR_T_STATE(x)	\
+    (((x) >> S_FW_OFLD_CONNECTION_WR_T_STATE) & \
+     M_FW_OFLD_CONNECTION_WR_T_STATE)
+
+#define S_FW_OFLD_CONNECTION_WR_RCV_SCALE	24
+#define M_FW_OFLD_CONNECTION_WR_RCV_SCALE	0xf
+#define V_FW_OFLD_CONNECTION_WR_RCV_SCALE(x)	\
+    ((x) << S_FW_OFLD_CONNECTION_WR_RCV_SCALE)
+#define G_FW_OFLD_CONNECTION_WR_RCV_SCALE(x)	\
+    (((x) >> S_FW_OFLD_CONNECTION_WR_RCV_SCALE) & \
+     M_FW_OFLD_CONNECTION_WR_RCV_SCALE)
+
+#define S_FW_OFLD_CONNECTION_WR_ASTID		0
+#define M_FW_OFLD_CONNECTION_WR_ASTID		0xffffff
+#define V_FW_OFLD_CONNECTION_WR_ASTID(x)	\
+    ((x) << S_FW_OFLD_CONNECTION_WR_ASTID)
+#define G_FW_OFLD_CONNECTION_WR_ASTID(x)	\
+    (((x) >> S_FW_OFLD_CONNECTION_WR_ASTID) & M_FW_OFLD_CONNECTION_WR_ASTID)
+
+#define S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK	15
+#define M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK	0x1
+#define V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x)	\
+    ((x) << S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK)
+#define G_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x)	\
+    (((x) >> S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) & \
+     M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK)
+#define F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK	\
+    V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(1U)
+
+#define S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL	14
+#define M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL	0x1
+#define V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x)	\
+    ((x) << S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL)
+#define G_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x)	\
+    (((x) >> S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) & \
+     M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL)
+#define F_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL	\
+    V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(1U)
+
+enum fw_flowc_mnem_tcpstate {
+	FW_FLOWC_MNEM_TCPSTATE_CLOSED	= 0, /* illegal */
+	FW_FLOWC_MNEM_TCPSTATE_LISTEN	= 1, /* illegal */
+	FW_FLOWC_MNEM_TCPSTATE_SYNSENT	= 2, /* illegal */
+	FW_FLOWC_MNEM_TCPSTATE_SYNRECEIVED = 3, /* illegal */
+	FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED = 4, /* default */
+	FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT = 5, /* got peer close already */
+	FW_FLOWC_MNEM_TCPSTATE_FINWAIT1	= 6, /* haven't gotten ACK for FIN and
+					      * will resend FIN - equiv ESTAB
+					      */
+	FW_FLOWC_MNEM_TCPSTATE_CLOSING	= 7, /* haven't gotten ACK for FIN and
+					      * will resend FIN but have
+					      * received FIN
+					      */
+	FW_FLOWC_MNEM_TCPSTATE_LASTACK	= 8, /* haven't gotten ACK for FIN and
+					      * will resend FIN but have
+					      * received FIN
+					      */
+	FW_FLOWC_MNEM_TCPSTATE_FINWAIT2	= 9, /* sent FIN and got FIN + ACK,
+					      * waiting for FIN
+					      */
+	FW_FLOWC_MNEM_TCPSTATE_TIMEWAIT	= 10, /* not expected */
+};
+
+enum fw_flowc_mnem_uostate {
+	FW_FLOWC_MNEM_UOSTATE_CLOSED	= 0, /* illegal */
+	FW_FLOWC_MNEM_UOSTATE_ESTABLISHED = 1, /* default */
+	FW_FLOWC_MNEM_UOSTATE_CLOSING	= 2, /* graceful close, after sending
+					      * outstanding payload
+					      */
+	FW_FLOWC_MNEM_UOSTATE_ABORTING	= 3, /* immediate close, after
+					      * discarding outstanding payload
+					      */
+};
+
 enum fw_flowc_mnem {
 	FW_FLOWC_MNEM_PFNVFN,		/* PFN [15:8] VFN [7:0] */
 	FW_FLOWC_MNEM_CH,
@@ -553,6 +717,9 @@
 	FW_FLOWC_MNEM_SNDBUF,
 	FW_FLOWC_MNEM_MSS,
 	FW_FLOWC_MNEM_TXDATAPLEN_MAX,
+	FW_FLOWC_MNEM_TCPSTATE,
+	FW_FLOWC_MNEM_UOSTATE,
+	FW_FLOWC_MNEM_SCHEDCLASS,
 };
 
 struct fw_flowc_mnemval {
@@ -672,7 +839,7 @@
  **************************************/
 
 enum fw_ri_wr_opcode {
-	FW_RI_RDMA_WRITE		= 0x0,               /* IETF RDMAP v1.0 ... */
+	FW_RI_RDMA_WRITE		= 0x0,	/* IETF RDMAP v1.0 ... */
 	FW_RI_READ_REQ			= 0x1,
 	FW_RI_READ_RESP			= 0x2,
 	FW_RI_SEND			= 0x3,
@@ -680,14 +847,23 @@
 	FW_RI_SEND_WITH_SE		= 0x5,
 	FW_RI_SEND_WITH_SE_INV		= 0x6,
 	FW_RI_TERMINATE			= 0x7,
-	FW_RI_RDMA_INIT			= 0x8,                /* CHELSIO RI specific ... */
+	FW_RI_RDMA_INIT			= 0x8,	/* CHELSIO RI specific ... */
 	FW_RI_BIND_MW			= 0x9,
 	FW_RI_FAST_REGISTER		= 0xa,
 	FW_RI_LOCAL_INV			= 0xb,
 	FW_RI_QP_MODIFY			= 0xc,
 	FW_RI_BYPASS			= 0xd,
 	FW_RI_RECEIVE			= 0xe,
-
+#if 0
+	FW_RI_SEND_IMMEDIATE		= 0x8,
+	FW_RI_SEND_IMMEDIATE_WITH_SE	= 0x9,
+	FW_RI_ATOMIC_REQUEST		= 0xa,
+	FW_RI_ATOMIC_RESPONSE		= 0xb,
+
+	FW_RI_BIND_MW			= 0xc, /* CHELSIO RI specific ... */
+	FW_RI_FAST_REGISTER		= 0xd,
+	FW_RI_LOCAL_INV			= 0xe,
+#endif
 	FW_RI_SGE_EC_CR_RETURN		= 0xf
 };
 
@@ -1403,6 +1579,72 @@
 	__be32 stag_inv;
 };
 
+struct fw_ri_send_immediate_wr {
+	__u8   opcode;
+	__u8   flags;
+	__u16  wrid;
+	__u8   r1[3];
+	__u8   len16;
+	__be32 sendimmop_pkd;
+	__be32 r3;
+	__be32 plen;
+	__be32 r4;
+	__be64 r5;
+#ifndef C99_NOT_SUPPORTED
+	struct fw_ri_immd immd_src[0];
+#endif
+};
+
+#define S_FW_RI_SEND_IMMEDIATE_WR_SENDIMMOP	0
+#define M_FW_RI_SEND_IMMEDIATE_WR_SENDIMMOP	0xf
+#define V_FW_RI_SEND_IMMEDIATE_WR_SENDIMMOP(x)	\
+    ((x) << S_FW_RI_SEND_IMMEDIATE_WR_SENDIMMOP)
+#define G_FW_RI_SEND_IMMEDIATE_WR_SENDIMMOP(x)	\
+    (((x) >> S_FW_RI_SEND_IMMEDIATE_WR_SENDIMMOP) & \
+     M_FW_RI_SEND_IMMEDIATE_WR_SENDIMMOP)
+
+enum fw_ri_atomic_op {
+	FW_RI_ATOMIC_OP_FETCHADD,
+	FW_RI_ATOMIC_OP_SWAP,
+	FW_RI_ATOMIC_OP_CMDSWAP,
+};
+
+struct fw_ri_atomic_wr {
+	__u8   opcode;
+	__u8   flags;
+	__u16  wrid;
+	__u8   r1[3];
+	__u8   len16;
+	__be32 atomicop_pkd;
+	__be64 r3;
+	__be32 aopcode_pkd;
+	__be32 reqid;
+	__be32 stag;
+	__be32 to_hi;
+	__be32 to_lo;
+	__be32 addswap_data_hi;
+	__be32 addswap_data_lo;
+	__be32 addswap_mask_hi;
+	__be32 addswap_mask_lo;
+	__be32 compare_data_hi;
+	__be32 compare_data_lo;
+	__be32 compare_mask_hi;
+	__be32 compare_mask_lo;
+	__be32 r5;
+};
+
+#define S_FW_RI_ATOMIC_WR_ATOMICOP	0
+#define M_FW_RI_ATOMIC_WR_ATOMICOP	0xf
+#define V_FW_RI_ATOMIC_WR_ATOMICOP(x)	((x) << S_FW_RI_ATOMIC_WR_ATOMICOP)
+#define G_FW_RI_ATOMIC_WR_ATOMICOP(x)	\
+    (((x) >> S_FW_RI_ATOMIC_WR_ATOMICOP) & M_FW_RI_ATOMIC_WR_ATOMICOP)
+
+#define S_FW_RI_ATOMIC_WR_AOPCODE	0
+#define M_FW_RI_ATOMIC_WR_AOPCODE	0xf
+#define V_FW_RI_ATOMIC_WR_AOPCODE(x)	((x) << S_FW_RI_ATOMIC_WR_AOPCODE)
+#define G_FW_RI_ATOMIC_WR_AOPCODE(x)	\
+    (((x) >> S_FW_RI_ATOMIC_WR_AOPCODE) & M_FW_RI_ATOMIC_WR_AOPCODE)
+
 enum fw_ri_type {
 	FW_RI_TYPE_INIT,
 	FW_RI_TYPE_FINI,
@@ -1485,273 +1727,1057 @@
     (((x) >> S_FW_RI_WR_P2PTYPE) & M_FW_RI_WR_P2PTYPE)
 
 /******************************************************************************
- *   S C S I   W O R K   R E Q U E S T s
- **********************************************/
-
-
-/******************************************************************************
- *   F O i S C S I   W O R K   R E Q U E S T s
- **********************************************/
-
-#define	ISCSI_NAME_MAX_LEN	224
-#define	ISCSI_ALIAS_MAX_LEN	224
-
-enum session_type {
-	ISCSI_SESSION_DISCOVERY = 0,
-	ISCSI_SESSION_NORMAL,
-};
-
-enum digest_val {
-	DIGEST_NONE = 0,
-	DIGEST_CRC32,
-	DIGEST_BOTH,
-};
-
-enum fw_iscsi_subops {
-	NODE_ONLINE = 1,
-	SESS_ONLINE,
-	CONN_ONLINE,
-	NODE_OFFLINE,
-	SESS_OFFLINE,
-	CONN_OFFLINE,
-	NODE_STATS,
-	SESS_STATS,
-	CONN_STATS,
-	UPDATE_IOHANDLE,
-};
-
-struct fw_iscsi_node_attr {
-	__u8		name_len;
-	__u8		node_name[ISCSI_NAME_MAX_LEN];
-	__u8		alias_len;
-	__u8		node_alias[ISCSI_ALIAS_MAX_LEN];
-};
-
-struct fw_iscsi_sess_attr {
-	__u8		sess_type;
-	__u8		seq_inorder;
-	__u8		pdu_inorder;
-	__u8		immd_data_en;
-	__u8		init_r2t_en;
-	__u8		erl;
-	__be16		max_conn;
-	__be16		max_r2t;
-	__be16		time2wait;
-	__be16		time2retain;
-	__be32		max_burst;
-	__be32		first_burst;
-};
-
-struct fw_iscsi_conn_attr {
-	__u8		hdr_digest;
-	__u8		data_digest;
-	__be32		max_rcv_dsl;
-	__be16		dst_port;
-	__be32		dst_addr;
-	__be16		src_port;
-	__be32		src_addr;
-	__be32		ping_tmo;
-};
-
-struct fw_iscsi_node_stats {
-	__be16		sess_count;
-	__be16		chap_fail_count;
-	__be16		login_count;
-	__be16		r1;
-};
-
-struct fw_iscsi_sess_stats {
-	__be32		rxbytes;
-	__be32		txbytes;
-	__be32		scmd_count;
-	__be32		read_cmds;
-	__be32		write_cmds;
-	__be32		read_bytes;
-	__be32		write_bytes;
-	__be32		scsi_err_count;
-	__be32		scsi_rst_count;
-	__be32		iscsi_tmf_count;
-	__be32		conn_count;
-};
-
-struct fw_iscsi_conn_stats {
-	__be32		txbytes;
-	__be32		rxbytes;
-	__be32		dataout;
-	__be32		datain;
-};
-
-struct fw_iscsi_node_wr {
-	__u8   opcode;
-	__u8   subop;
-	__be16 immd_len;
+ *  F O i S C S I   W O R K R E Q U E S T s
+ *********************************************/
+
+#define	FW_FOISCSI_NAME_MAX_LEN		224
+#define	FW_FOISCSI_ALIAS_MAX_LEN	224
+#define FW_FOISCSI_MAX_CHAP_NAME_LEN	64
+#define	FW_FOISCSI_INIT_NODE_MAX	8
+
+enum fw_chnet_ifconf_wr_subop {
+	FW_CHNET_IFCONF_WR_SUBOP_NONE = 0,
+	
+	FW_CHNET_IFCONF_WR_SUBOP_IPV4_SET,
+	FW_CHNET_IFCONF_WR_SUBOP_IPV4_GET,
+	
+	FW_CHNET_IFCONF_WR_SUBOP_VLAN_IPV4_SET,
+	FW_CHNET_IFCONF_WR_SUBOP_VLAN_IPV4_GET,
+
+	FW_CHNET_IFCONF_WR_SUBOP_IPV6_SET,
+	FW_CHNET_IFCONF_WR_SUBOP_IPV6_GET,
+
+	FW_CHNET_IFCONF_WR_SUBOP_VLAN_SET,
+	FW_CHNET_IFCONF_WR_SUBOP_VLAN_GET,
+
+	FW_CHNET_IFCONF_WR_SUBOP_MTU_SET,
+	FW_CHNET_IFCONF_WR_SUBOP_MTU_GET,
+
+	FW_CHNET_IFCONF_WR_SUBOP_DHCP_SET,
+	FW_CHNET_IFCONF_WR_SUBOP_DHCP_GET,
+
+	FW_CHNET_IFCONF_WR_SUBOP_MAX,
+};
+
+struct fw_chnet_ifconf_wr {
+	__be32 op_compl;
 	__be32 flowid_len16;
 	__be64 cookie;
-	__u8   node_attr_to_compl;
+	__be32 if_flowid;
+	__u8   idx;
+	__u8   subop;
+	__u8   retval;
+	__u8   r2;
+	__be64 r3;
+	struct fw_chnet_ifconf_params {
+		__be32 r0;
+		__be16 vlanid;
+		__be16 mtu;
+		union fw_chnet_ifconf_addr_type {
+			struct fw_chnet_ifconf_ipv4 {
+				__be32 addr;
+				__be32 mask;
+				__be32 router;
+				__be32 r0;
+				__be64 r1;
+			} ipv4;
+			struct fw_chnet_ifconf_ipv6 {
+				__be64 linklocal_lo;
+				__be64 linklocal_hi;
+				__be64 router_hi;
+				__be64 router_lo;
+				__be64 aconf_hi;
+				__be64 aconf_lo;
+				__be64 linklocal_aconf_hi;
+				__be64 linklocal_aconf_lo;
+				__be64 router_aconf_hi;
+				__be64 router_aconf_lo;
+				__be64 r0;
+			} ipv6;
+		} in_attr;
+	} param;
+};
+
+enum fw_foiscsi_session_type {
+	FW_FOISCSI_SESSION_TYPE_DISCOVERY = 0,
+	FW_FOISCSI_SESSION_TYPE_NORMAL,
+};
+
+enum fw_foiscsi_auth_policy {
+	FW_FOISCSI_AUTH_POLICY_ONEWAY = 0,
+	FW_FOISCSI_AUTH_POLICY_MUTUAL,
+};
+
+enum fw_foiscsi_auth_method {
+	FW_FOISCSI_AUTH_METHOD_NONE = 0,
+	FW_FOISCSI_AUTH_METHOD_CHAP,
+	FW_FOISCSI_AUTH_METHOD_CHAP_FST,
+	FW_FOISCSI_AUTH_METHOD_CHAP_SEC,
+};
+
+enum fw_foiscsi_digest_type {
+	FW_FOISCSI_DIGEST_TYPE_NONE = 0,
+	FW_FOISCSI_DIGEST_TYPE_CRC32,
+	FW_FOISCSI_DIGEST_TYPE_CRC32_FST,
+	FW_FOISCSI_DIGEST_TYPE_CRC32_SEC,
+};
+
+enum fw_foiscsi_wr_subop {
+	FW_FOISCSI_WR_SUBOP_ADD = 1,
+	FW_FOISCSI_WR_SUBOP_DEL = 2,
+	FW_FOISCSI_WR_SUBOP_MOD = 4,
+};
+
+enum fw_foiscsi_ctrl_state {
+	FW_FOISCSI_CTRL_STATE_FREE = 0,
+	FW_FOISCSI_CTRL_STATE_ONLINE = 1,
+	FW_FOISCSI_CTRL_STATE_FAILED,
+	FW_FOISCSI_CTRL_STATE_IN_RECOVERY,
+	FW_FOISCSI_CTRL_STATE_REDIRECT,
+};
+
+struct fw_rdev_wr {
+	__be32 op_to_immdlen;
+	__be32 alloc_to_len16;
+	__be64 cookie;
+	__u8   protocol;
+	__u8   event_cause;
+	__u8   cur_state;
+	__u8   prev_state;
+	__be32 flags_to_assoc_flowid;
+	union rdev_entry {
+		struct fcoe_rdev_entry {
+			__be32 flowid;
+			__u8   protocol;
+			__u8   event_cause;
+			__u8   flags;
+			__u8   rjt_reason;
+			__u8   cur_login_st;
+			__u8   prev_login_st;
+			__be16 rcv_fr_sz;
+			__u8   rd_xfer_rdy_to_rport_type;
+			__u8   vft_to_qos;
+			__u8   org_proc_assoc_to_acc_rsp_code;
+			__u8   enh_disc_to_tgt;
+			__u8   wwnn[8];
+			__u8   wwpn[8];
+			__be16 iqid;
+			__u8   fc_oui[3];
+			__u8   r_id[3];
+		} fcoe_rdev;
+		struct iscsi_rdev_entry {
+			__be32 flowid;
+			__u8   protocol;
+			__u8   event_cause;
+			__u8   flags;
+			__u8   r3;
+			__be16 iscsi_opts;
+			__be16 tcp_opts;
+			__be16 ip_opts;
+			__be16 max_rcv_len;
+			__be16 max_snd_len;
+			__be16 first_brst_len;
+			__be16 max_brst_len;
+			__be16 r4;
+			__be16 def_time2wait;
+			__be16 def_time2ret;
+			__be16 nop_out_intrvl;
+			__be16 non_scsi_to;
+			__be16 isid;
+			__be16 tsid;
+			__be16 port;
+			__be16 tpgt;
+			__u8   r5[6];
+			__be16 iqid;
+		} iscsi_rdev;
+	} u;
+};
+
+#define S_FW_RDEV_WR_IMMDLEN	0
+#define M_FW_RDEV_WR_IMMDLEN	0xff
+#define V_FW_RDEV_WR_IMMDLEN(x)	((x) << S_FW_RDEV_WR_IMMDLEN)
+#define G_FW_RDEV_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_RDEV_WR_IMMDLEN) & M_FW_RDEV_WR_IMMDLEN)
+
+#define S_FW_RDEV_WR_ALLOC	31
+#define M_FW_RDEV_WR_ALLOC	0x1
+#define V_FW_RDEV_WR_ALLOC(x)	((x) << S_FW_RDEV_WR_ALLOC)
+#define G_FW_RDEV_WR_ALLOC(x)	\
+    (((x) >> S_FW_RDEV_WR_ALLOC) & M_FW_RDEV_WR_ALLOC)
+#define F_FW_RDEV_WR_ALLOC	V_FW_RDEV_WR_ALLOC(1U)
+
+#define S_FW_RDEV_WR_FREE	30
+#define M_FW_RDEV_WR_FREE	0x1
+#define V_FW_RDEV_WR_FREE(x)	((x) << S_FW_RDEV_WR_FREE)
+#define G_FW_RDEV_WR_FREE(x)	\
+    (((x) >> S_FW_RDEV_WR_FREE) & M_FW_RDEV_WR_FREE)
+#define F_FW_RDEV_WR_FREE	V_FW_RDEV_WR_FREE(1U)
+
+#define S_FW_RDEV_WR_MODIFY	29
+#define M_FW_RDEV_WR_MODIFY	0x1
+#define V_FW_RDEV_WR_MODIFY(x)	((x) << S_FW_RDEV_WR_MODIFY)
+#define G_FW_RDEV_WR_MODIFY(x)	\
+    (((x) >> S_FW_RDEV_WR_MODIFY) & M_FW_RDEV_WR_MODIFY)
+#define F_FW_RDEV_WR_MODIFY	V_FW_RDEV_WR_MODIFY(1U)
+
+#define S_FW_RDEV_WR_FLOWID	8
+#define M_FW_RDEV_WR_FLOWID	0xfffff
+#define V_FW_RDEV_WR_FLOWID(x)	((x) << S_FW_RDEV_WR_FLOWID)
+#define G_FW_RDEV_WR_FLOWID(x)	\
+    (((x) >> S_FW_RDEV_WR_FLOWID) & M_FW_RDEV_WR_FLOWID)
+
+#define S_FW_RDEV_WR_LEN16	0
+#define M_FW_RDEV_WR_LEN16	0xff
+#define V_FW_RDEV_WR_LEN16(x)	((x) << S_FW_RDEV_WR_LEN16)
+#define G_FW_RDEV_WR_LEN16(x)	\
+    (((x) >> S_FW_RDEV_WR_LEN16) & M_FW_RDEV_WR_LEN16)
+
+#define S_FW_RDEV_WR_FLAGS	24
+#define M_FW_RDEV_WR_FLAGS	0xff
+#define V_FW_RDEV_WR_FLAGS(x)	((x) << S_FW_RDEV_WR_FLAGS)
+#define G_FW_RDEV_WR_FLAGS(x)	\
+    (((x) >> S_FW_RDEV_WR_FLAGS) & M_FW_RDEV_WR_FLAGS)
+
+#define S_FW_RDEV_WR_GET_NEXT		20
+#define M_FW_RDEV_WR_GET_NEXT		0xf
+#define V_FW_RDEV_WR_GET_NEXT(x)	((x) << S_FW_RDEV_WR_GET_NEXT)
+#define G_FW_RDEV_WR_GET_NEXT(x)	\
+    (((x) >> S_FW_RDEV_WR_GET_NEXT) & M_FW_RDEV_WR_GET_NEXT)
+
+#define S_FW_RDEV_WR_ASSOC_FLOWID	0
+#define M_FW_RDEV_WR_ASSOC_FLOWID	0xfffff
+#define V_FW_RDEV_WR_ASSOC_FLOWID(x)	((x) << S_FW_RDEV_WR_ASSOC_FLOWID)
+#define G_FW_RDEV_WR_ASSOC_FLOWID(x)	\
+    (((x) >> S_FW_RDEV_WR_ASSOC_FLOWID) & M_FW_RDEV_WR_ASSOC_FLOWID)
+
+#define S_FW_RDEV_WR_RJT	7
+#define M_FW_RDEV_WR_RJT	0x1
+#define V_FW_RDEV_WR_RJT(x)	((x) << S_FW_RDEV_WR_RJT)
+#define G_FW_RDEV_WR_RJT(x)	(((x) >> S_FW_RDEV_WR_RJT) & M_FW_RDEV_WR_RJT)
+#define F_FW_RDEV_WR_RJT	V_FW_RDEV_WR_RJT(1U)
+
+#define S_FW_RDEV_WR_REASON	0
+#define M_FW_RDEV_WR_REASON	0x7f
+#define V_FW_RDEV_WR_REASON(x)	((x) << S_FW_RDEV_WR_REASON)
+#define G_FW_RDEV_WR_REASON(x)	\
+    (((x) >> S_FW_RDEV_WR_REASON) & M_FW_RDEV_WR_REASON)
+
+#define S_FW_RDEV_WR_RD_XFER_RDY	7
+#define M_FW_RDEV_WR_RD_XFER_RDY	0x1
+#define V_FW_RDEV_WR_RD_XFER_RDY(x)	((x) << S_FW_RDEV_WR_RD_XFER_RDY)
+#define G_FW_RDEV_WR_RD_XFER_RDY(x)	\
+    (((x) >> S_FW_RDEV_WR_RD_XFER_RDY) & M_FW_RDEV_WR_RD_XFER_RDY)
+#define F_FW_RDEV_WR_RD_XFER_RDY	V_FW_RDEV_WR_RD_XFER_RDY(1U)
+
+#define S_FW_RDEV_WR_WR_XFER_RDY	6
+#define M_FW_RDEV_WR_WR_XFER_RDY	0x1
+#define V_FW_RDEV_WR_WR_XFER_RDY(x)	((x) << S_FW_RDEV_WR_WR_XFER_RDY)
+#define G_FW_RDEV_WR_WR_XFER_RDY(x)	\
+    (((x) >> S_FW_RDEV_WR_WR_XFER_RDY) & M_FW_RDEV_WR_WR_XFER_RDY)
+#define F_FW_RDEV_WR_WR_XFER_RDY	V_FW_RDEV_WR_WR_XFER_RDY(1U)
+
+#define S_FW_RDEV_WR_FC_SP	5
+#define M_FW_RDEV_WR_FC_SP	0x1
+#define V_FW_RDEV_WR_FC_SP(x)	((x) << S_FW_RDEV_WR_FC_SP)
+#define G_FW_RDEV_WR_FC_SP(x)	\
+    (((x) >> S_FW_RDEV_WR_FC_SP) & M_FW_RDEV_WR_FC_SP)
+#define F_FW_RDEV_WR_FC_SP	V_FW_RDEV_WR_FC_SP(1U)
+
+#define S_FW_RDEV_WR_RPORT_TYPE		0
+#define M_FW_RDEV_WR_RPORT_TYPE		0x1f
+#define V_FW_RDEV_WR_RPORT_TYPE(x)	((x) << S_FW_RDEV_WR_RPORT_TYPE)
+#define G_FW_RDEV_WR_RPORT_TYPE(x)	\
+    (((x) >> S_FW_RDEV_WR_RPORT_TYPE) & M_FW_RDEV_WR_RPORT_TYPE)
+
+#define S_FW_RDEV_WR_VFT	7
+#define M_FW_RDEV_WR_VFT	0x1
+#define V_FW_RDEV_WR_VFT(x)	((x) << S_FW_RDEV_WR_VFT)
+#define G_FW_RDEV_WR_VFT(x)	(((x) >> S_FW_RDEV_WR_VFT) & M_FW_RDEV_WR_VFT)
+#define F_FW_RDEV_WR_VFT	V_FW_RDEV_WR_VFT(1U)
+
+#define S_FW_RDEV_WR_NPIV	6
+#define M_FW_RDEV_WR_NPIV	0x1
+#define V_FW_RDEV_WR_NPIV(x)	((x) << S_FW_RDEV_WR_NPIV)
+#define G_FW_RDEV_WR_NPIV(x)	\
+    (((x) >> S_FW_RDEV_WR_NPIV) & M_FW_RDEV_WR_NPIV)
+#define F_FW_RDEV_WR_NPIV	V_FW_RDEV_WR_NPIV(1U)
+
+#define S_FW_RDEV_WR_CLASS	4
+#define M_FW_RDEV_WR_CLASS	0x3
+#define V_FW_RDEV_WR_CLASS(x)	((x) << S_FW_RDEV_WR_CLASS)
+#define G_FW_RDEV_WR_CLASS(x)	\
+    (((x) >> S_FW_RDEV_WR_CLASS) & M_FW_RDEV_WR_CLASS)
+
+#define S_FW_RDEV_WR_SEQ_DEL	3
+#define M_FW_RDEV_WR_SEQ_DEL	0x1
+#define V_FW_RDEV_WR_SEQ_DEL(x)	((x) << S_FW_RDEV_WR_SEQ_DEL)
+#define G_FW_RDEV_WR_SEQ_DEL(x)	\
+    (((x) >> S_FW_RDEV_WR_SEQ_DEL) & M_FW_RDEV_WR_SEQ_DEL)
+#define F_FW_RDEV_WR_SEQ_DEL	V_FW_RDEV_WR_SEQ_DEL(1U)
+
+#define S_FW_RDEV_WR_PRIO_PREEMP	2
+#define M_FW_RDEV_WR_PRIO_PREEMP	0x1
+#define V_FW_RDEV_WR_PRIO_PREEMP(x)	((x) << S_FW_RDEV_WR_PRIO_PREEMP)
+#define G_FW_RDEV_WR_PRIO_PREEMP(x)	\
+    (((x) >> S_FW_RDEV_WR_PRIO_PREEMP) & M_FW_RDEV_WR_PRIO_PREEMP)
+#define F_FW_RDEV_WR_PRIO_PREEMP	V_FW_RDEV_WR_PRIO_PREEMP(1U)
+
+#define S_FW_RDEV_WR_PREF	1
+#define M_FW_RDEV_WR_PREF	0x1
+#define V_FW_RDEV_WR_PREF(x)	((x) << S_FW_RDEV_WR_PREF)
+#define G_FW_RDEV_WR_PREF(x)	\
+    (((x) >> S_FW_RDEV_WR_PREF) & M_FW_RDEV_WR_PREF)
+#define F_FW_RDEV_WR_PREF	V_FW_RDEV_WR_PREF(1U)
+
+#define S_FW_RDEV_WR_QOS	0
+#define M_FW_RDEV_WR_QOS	0x1
+#define V_FW_RDEV_WR_QOS(x)	((x) << S_FW_RDEV_WR_QOS)
+#define G_FW_RDEV_WR_QOS(x)	(((x) >> S_FW_RDEV_WR_QOS) & M_FW_RDEV_WR_QOS)
+#define F_FW_RDEV_WR_QOS	V_FW_RDEV_WR_QOS(1U)
+
+#define S_FW_RDEV_WR_ORG_PROC_ASSOC	7
+#define M_FW_RDEV_WR_ORG_PROC_ASSOC	0x1
+#define V_FW_RDEV_WR_ORG_PROC_ASSOC(x)	((x) << S_FW_RDEV_WR_ORG_PROC_ASSOC)
+#define G_FW_RDEV_WR_ORG_PROC_ASSOC(x)	\
+    (((x) >> S_FW_RDEV_WR_ORG_PROC_ASSOC) & M_FW_RDEV_WR_ORG_PROC_ASSOC)
+#define F_FW_RDEV_WR_ORG_PROC_ASSOC	V_FW_RDEV_WR_ORG_PROC_ASSOC(1U)
+
+#define S_FW_RDEV_WR_RSP_PROC_ASSOC	6
+#define M_FW_RDEV_WR_RSP_PROC_ASSOC	0x1
+#define V_FW_RDEV_WR_RSP_PROC_ASSOC(x)	((x) << S_FW_RDEV_WR_RSP_PROC_ASSOC)
+#define G_FW_RDEV_WR_RSP_PROC_ASSOC(x)	\
+    (((x) >> S_FW_RDEV_WR_RSP_PROC_ASSOC) & M_FW_RDEV_WR_RSP_PROC_ASSOC)
+#define F_FW_RDEV_WR_RSP_PROC_ASSOC	V_FW_RDEV_WR_RSP_PROC_ASSOC(1U)
+
+#define S_FW_RDEV_WR_IMAGE_PAIR		5
+#define M_FW_RDEV_WR_IMAGE_PAIR		0x1
+#define V_FW_RDEV_WR_IMAGE_PAIR(x)	((x) << S_FW_RDEV_WR_IMAGE_PAIR)
+#define G_FW_RDEV_WR_IMAGE_PAIR(x)	\
+    (((x) >> S_FW_RDEV_WR_IMAGE_PAIR) & M_FW_RDEV_WR_IMAGE_PAIR)
+#define F_FW_RDEV_WR_IMAGE_PAIR	V_FW_RDEV_WR_IMAGE_PAIR(1U)
+
+#define S_FW_RDEV_WR_ACC_RSP_CODE	0
+#define M_FW_RDEV_WR_ACC_RSP_CODE	0x1f
+#define V_FW_RDEV_WR_ACC_RSP_CODE(x)	((x) << S_FW_RDEV_WR_ACC_RSP_CODE)
+#define G_FW_RDEV_WR_ACC_RSP_CODE(x)	\
+    (((x) >> S_FW_RDEV_WR_ACC_RSP_CODE) & M_FW_RDEV_WR_ACC_RSP_CODE)
+
+#define S_FW_RDEV_WR_ENH_DISC		7
+#define M_FW_RDEV_WR_ENH_DISC		0x1
+#define V_FW_RDEV_WR_ENH_DISC(x)	((x) << S_FW_RDEV_WR_ENH_DISC)
+#define G_FW_RDEV_WR_ENH_DISC(x)	\
+    (((x) >> S_FW_RDEV_WR_ENH_DISC) & M_FW_RDEV_WR_ENH_DISC)
+#define F_FW_RDEV_WR_ENH_DISC	V_FW_RDEV_WR_ENH_DISC(1U)
+
+#define S_FW_RDEV_WR_REC	6
+#define M_FW_RDEV_WR_REC	0x1
+#define V_FW_RDEV_WR_REC(x)	((x) << S_FW_RDEV_WR_REC)
+#define G_FW_RDEV_WR_REC(x)	(((x) >> S_FW_RDEV_WR_REC) & M_FW_RDEV_WR_REC)
+#define F_FW_RDEV_WR_REC	V_FW_RDEV_WR_REC(1U)
+
+#define S_FW_RDEV_WR_TASK_RETRY_ID	5
+#define M_FW_RDEV_WR_TASK_RETRY_ID	0x1
+#define V_FW_RDEV_WR_TASK_RETRY_ID(x)	((x) << S_FW_RDEV_WR_TASK_RETRY_ID)
+#define G_FW_RDEV_WR_TASK_RETRY_ID(x)	\
+    (((x) >> S_FW_RDEV_WR_TASK_RETRY_ID) & M_FW_RDEV_WR_TASK_RETRY_ID)
+#define F_FW_RDEV_WR_TASK_RETRY_ID	V_FW_RDEV_WR_TASK_RETRY_ID(1U)
+
+#define S_FW_RDEV_WR_RETRY	4
+#define M_FW_RDEV_WR_RETRY	0x1
+#define V_FW_RDEV_WR_RETRY(x)	((x) << S_FW_RDEV_WR_RETRY)
+#define G_FW_RDEV_WR_RETRY(x)	\
+    (((x) >> S_FW_RDEV_WR_RETRY) & M_FW_RDEV_WR_RETRY)
+#define F_FW_RDEV_WR_RETRY	V_FW_RDEV_WR_RETRY(1U)
+
+#define S_FW_RDEV_WR_CONF_CMPL		3
+#define M_FW_RDEV_WR_CONF_CMPL		0x1
+#define V_FW_RDEV_WR_CONF_CMPL(x)	((x) << S_FW_RDEV_WR_CONF_CMPL)
+#define G_FW_RDEV_WR_CONF_CMPL(x)	\
+    (((x) >> S_FW_RDEV_WR_CONF_CMPL) & M_FW_RDEV_WR_CONF_CMPL)
+#define F_FW_RDEV_WR_CONF_CMPL	V_FW_RDEV_WR_CONF_CMPL(1U)
+
+#define S_FW_RDEV_WR_DATA_OVLY		2
+#define M_FW_RDEV_WR_DATA_OVLY		0x1
+#define V_FW_RDEV_WR_DATA_OVLY(x)	((x) << S_FW_RDEV_WR_DATA_OVLY)
+#define G_FW_RDEV_WR_DATA_OVLY(x)	\
+    (((x) >> S_FW_RDEV_WR_DATA_OVLY) & M_FW_RDEV_WR_DATA_OVLY)
+#define F_FW_RDEV_WR_DATA_OVLY	V_FW_RDEV_WR_DATA_OVLY(1U)
+
+#define S_FW_RDEV_WR_INI	1
+#define M_FW_RDEV_WR_INI	0x1
+#define V_FW_RDEV_WR_INI(x)	((x) << S_FW_RDEV_WR_INI)
+#define G_FW_RDEV_WR_INI(x)	(((x) >> S_FW_RDEV_WR_INI) & M_FW_RDEV_WR_INI)
+#define F_FW_RDEV_WR_INI	V_FW_RDEV_WR_INI(1U)
+
+#define S_FW_RDEV_WR_TGT	0
+#define M_FW_RDEV_WR_TGT	0x1
+#define V_FW_RDEV_WR_TGT(x)	((x) << S_FW_RDEV_WR_TGT)
+#define G_FW_RDEV_WR_TGT(x)	(((x) >> S_FW_RDEV_WR_TGT) & M_FW_RDEV_WR_TGT)
+#define F_FW_RDEV_WR_TGT	V_FW_RDEV_WR_TGT(1U)
+
+struct fw_foiscsi_node_wr {
+	__be32 op_to_immdlen;
+	__be32 flowid_len16;
+	__u64  cookie;
+	__u8   subop;
 	__u8   status;
-	__be16 r1;
+	__u8   alias_len;
+	__u8   iqn_len;
+	__be32 node_flowid;
+	__be16 nodeid;
+	__be16 login_retry;
+	__be16 retry_timeout;
+	__be16 r3;
+	__u8   iqn[224];
+	__u8   alias[224];
+};
+
+#define S_FW_FOISCSI_NODE_WR_IMMDLEN	0
+#define M_FW_FOISCSI_NODE_WR_IMMDLEN	0xffff
+#define V_FW_FOISCSI_NODE_WR_IMMDLEN(x)	((x) << S_FW_FOISCSI_NODE_WR_IMMDLEN)
+#define G_FW_FOISCSI_NODE_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_FOISCSI_NODE_WR_IMMDLEN) & M_FW_FOISCSI_NODE_WR_IMMDLEN)
+
+struct fw_foiscsi_ctrl_wr {
+	__be32 op_compl;
+	__be32 flowid_len16;
+	__u64  cookie;
+	__u8   subop;
+	__u8   status;
+	__u8   ctrl_state;
+	__u8   io_state;
 	__be32 node_id;
-	__be32 ctrl_handle;
-	__be32 io_handle;
-};
-
-#define S_FW_ISCSI_NODE_WR_FLOWID	8
-#define M_FW_ISCSI_NODE_WR_FLOWID	0xfffff
-#define V_FW_ISCSI_NODE_WR_FLOWID(x)	((x) << S_FW_ISCSI_NODE_WR_FLOWID)
-#define G_FW_ISCSI_NODE_WR_FLOWID(x)	\
-    (((x) >> S_FW_ISCSI_NODE_WR_FLOWID) & M_FW_ISCSI_NODE_WR_FLOWID)
-
-#define S_FW_ISCSI_NODE_WR_LEN16	0
-#define M_FW_ISCSI_NODE_WR_LEN16	0xff
-#define V_FW_ISCSI_NODE_WR_LEN16(x)	((x) << S_FW_ISCSI_NODE_WR_LEN16)
-#define G_FW_ISCSI_NODE_WR_LEN16(x)	\
-    (((x) >> S_FW_ISCSI_NODE_WR_LEN16) & M_FW_ISCSI_NODE_WR_LEN16)
-
-#define S_FW_ISCSI_NODE_WR_NODE_ATTR	7
-#define M_FW_ISCSI_NODE_WR_NODE_ATTR	0x1
-#define V_FW_ISCSI_NODE_WR_NODE_ATTR(x)	((x) << S_FW_ISCSI_NODE_WR_NODE_ATTR)
-#define G_FW_ISCSI_NODE_WR_NODE_ATTR(x)	\
-    (((x) >> S_FW_ISCSI_NODE_WR_NODE_ATTR) & M_FW_ISCSI_NODE_WR_NODE_ATTR)
-#define F_FW_ISCSI_NODE_WR_NODE_ATTR	V_FW_ISCSI_NODE_WR_NODE_ATTR(1U)
-
-#define S_FW_ISCSI_NODE_WR_SESS_ATTR	6
-#define M_FW_ISCSI_NODE_WR_SESS_ATTR	0x1
-#define V_FW_ISCSI_NODE_WR_SESS_ATTR(x)	((x) << S_FW_ISCSI_NODE_WR_SESS_ATTR)
-#define G_FW_ISCSI_NODE_WR_SESS_ATTR(x)	\
-    (((x) >> S_FW_ISCSI_NODE_WR_SESS_ATTR) & M_FW_ISCSI_NODE_WR_SESS_ATTR)
-#define F_FW_ISCSI_NODE_WR_SESS_ATTR	V_FW_ISCSI_NODE_WR_SESS_ATTR(1U)
-
-#define S_FW_ISCSI_NODE_WR_CONN_ATTR	5
-#define M_FW_ISCSI_NODE_WR_CONN_ATTR	0x1
-#define V_FW_ISCSI_NODE_WR_CONN_ATTR(x)	((x) << S_FW_ISCSI_NODE_WR_CONN_ATTR)
-#define G_FW_ISCSI_NODE_WR_CONN_ATTR(x)	\
-    (((x) >> S_FW_ISCSI_NODE_WR_CONN_ATTR) & M_FW_ISCSI_NODE_WR_CONN_ATTR)
-#define F_FW_ISCSI_NODE_WR_CONN_ATTR	V_FW_ISCSI_NODE_WR_CONN_ATTR(1U)
-
-#define S_FW_ISCSI_NODE_WR_TGT_ATTR	4
-#define M_FW_ISCSI_NODE_WR_TGT_ATTR	0x1
-#define V_FW_ISCSI_NODE_WR_TGT_ATTR(x)	((x) << S_FW_ISCSI_NODE_WR_TGT_ATTR)
-#define G_FW_ISCSI_NODE_WR_TGT_ATTR(x)	\
-    (((x) >> S_FW_ISCSI_NODE_WR_TGT_ATTR) & M_FW_ISCSI_NODE_WR_TGT_ATTR)
-#define F_FW_ISCSI_NODE_WR_TGT_ATTR	V_FW_ISCSI_NODE_WR_TGT_ATTR(1U)
-
-#define S_FW_ISCSI_NODE_WR_NODE_TYPE	3
-#define M_FW_ISCSI_NODE_WR_NODE_TYPE	0x1
-#define V_FW_ISCSI_NODE_WR_NODE_TYPE(x)	((x) << S_FW_ISCSI_NODE_WR_NODE_TYPE)
-#define G_FW_ISCSI_NODE_WR_NODE_TYPE(x)	\
-    (((x) >> S_FW_ISCSI_NODE_WR_NODE_TYPE) & M_FW_ISCSI_NODE_WR_NODE_TYPE)
-#define F_FW_ISCSI_NODE_WR_NODE_TYPE	V_FW_ISCSI_NODE_WR_NODE_TYPE(1U)
-
-#define S_FW_ISCSI_NODE_WR_COMPL	0
-#define M_FW_ISCSI_NODE_WR_COMPL	0x1
-#define V_FW_ISCSI_NODE_WR_COMPL(x)	((x) << S_FW_ISCSI_NODE_WR_COMPL)
-#define G_FW_ISCSI_NODE_WR_COMPL(x)	\
-    (((x) >> S_FW_ISCSI_NODE_WR_COMPL) & M_FW_ISCSI_NODE_WR_COMPL)
-#define F_FW_ISCSI_NODE_WR_COMPL	V_FW_ISCSI_NODE_WR_COMPL(1U)
-
-#define FW_ISCSI_NODE_INVALID_ID	0xffffffff
-
-struct fw_scsi_iscsi_data {
-	__u8   r0;
-	__u8   fbit_to_tattr;
-	__be16 r2;
-	__be32 r3;
-	__u8   lun[8];
+	__be32 ctrl_id;
+	__be32 io_id;
+	struct fw_foiscsi_sess_attr {
+		__be32 sess_type_to_erl;
+		__be16 max_conn;
+		__be16 max_r2t;
+		__be16 time2wait;
+		__be16 time2retain;
+		__be32 max_burst;
+		__be32 first_burst;
+		__be32 r1;
+	} sess_attr;
+	struct fw_foiscsi_conn_attr {
+		__be32 hdigest_to_auth_policy;
+		__be32 max_rcv_dsl;
+		__be32 ping_tmo;
+		__be16 dst_port;
+		__be16 src_port;
+		union fw_foiscsi_conn_attr_addr {
+			struct fw_foiscsi_conn_attr_ipv6 {
+				__be64 dst_addr[2];
+				__be64 src_addr[2];
+			} ipv6_addr;
+			struct fw_foiscsi_conn_attr_ipv4 {
+				__be32 dst_addr;
+				__be32 src_addr;
+			} ipv4_addr;
+		} u;
+	} conn_attr;
+	__u8   tgt_name_len;
+	__u8   r3[7];
+	__u8   tgt_name[224];
+};
+
+#define S_FW_FOISCSI_CTRL_WR_SESS_TYPE		30
+#define M_FW_FOISCSI_CTRL_WR_SESS_TYPE		0x3
+#define V_FW_FOISCSI_CTRL_WR_SESS_TYPE(x)	\
+    ((x) << S_FW_FOISCSI_CTRL_WR_SESS_TYPE)
+#define G_FW_FOISCSI_CTRL_WR_SESS_TYPE(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_SESS_TYPE) & M_FW_FOISCSI_CTRL_WR_SESS_TYPE)
+
+#define S_FW_FOISCSI_CTRL_WR_SEQ_INORDER	29
+#define M_FW_FOISCSI_CTRL_WR_SEQ_INORDER	0x1
+#define V_FW_FOISCSI_CTRL_WR_SEQ_INORDER(x)	\
+    ((x) << S_FW_FOISCSI_CTRL_WR_SEQ_INORDER)
+#define G_FW_FOISCSI_CTRL_WR_SEQ_INORDER(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_SEQ_INORDER) & \
+     M_FW_FOISCSI_CTRL_WR_SEQ_INORDER)
+#define F_FW_FOISCSI_CTRL_WR_SEQ_INORDER	\
+    V_FW_FOISCSI_CTRL_WR_SEQ_INORDER(1U)
+
+#define S_FW_FOISCSI_CTRL_WR_PDU_INORDER	28
+#define M_FW_FOISCSI_CTRL_WR_PDU_INORDER	0x1
+#define V_FW_FOISCSI_CTRL_WR_PDU_INORDER(x)	\
+    ((x) << S_FW_FOISCSI_CTRL_WR_PDU_INORDER)
+#define G_FW_FOISCSI_CTRL_WR_PDU_INORDER(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_PDU_INORDER) & \
+     M_FW_FOISCSI_CTRL_WR_PDU_INORDER)
+#define F_FW_FOISCSI_CTRL_WR_PDU_INORDER	\
+    V_FW_FOISCSI_CTRL_WR_PDU_INORDER(1U)
+
+#define S_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN	27
+#define M_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN	0x1
+#define V_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN(x)	\
+    ((x) << S_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN)
+#define G_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN) & \
+     M_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN)
+#define F_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN	\
+    V_FW_FOISCSI_CTRL_WR_IMMD_DATA_EN(1U)
+
+#define S_FW_FOISCSI_CTRL_WR_INIT_R2T_EN	26
+#define M_FW_FOISCSI_CTRL_WR_INIT_R2T_EN	0x1
+#define V_FW_FOISCSI_CTRL_WR_INIT_R2T_EN(x)	\
+    ((x) << S_FW_FOISCSI_CTRL_WR_INIT_R2T_EN)
+#define G_FW_FOISCSI_CTRL_WR_INIT_R2T_EN(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_INIT_R2T_EN) & \
+     M_FW_FOISCSI_CTRL_WR_INIT_R2T_EN)
+#define F_FW_FOISCSI_CTRL_WR_INIT_R2T_EN	\
+    V_FW_FOISCSI_CTRL_WR_INIT_R2T_EN(1U)
+
+#define S_FW_FOISCSI_CTRL_WR_ERL	24
+#define M_FW_FOISCSI_CTRL_WR_ERL	0x3
+#define V_FW_FOISCSI_CTRL_WR_ERL(x)	((x) << S_FW_FOISCSI_CTRL_WR_ERL)
+#define G_FW_FOISCSI_CTRL_WR_ERL(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_ERL) & M_FW_FOISCSI_CTRL_WR_ERL)
+
+#define S_FW_FOISCSI_CTRL_WR_HDIGEST	30
+#define M_FW_FOISCSI_CTRL_WR_HDIGEST	0x3
+#define V_FW_FOISCSI_CTRL_WR_HDIGEST(x)	((x) << S_FW_FOISCSI_CTRL_WR_HDIGEST)
+#define G_FW_FOISCSI_CTRL_WR_HDIGEST(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_HDIGEST) & M_FW_FOISCSI_CTRL_WR_HDIGEST)
+
+#define S_FW_FOISCSI_CTRL_WR_DDIGEST	28
+#define M_FW_FOISCSI_CTRL_WR_DDIGEST	0x3
+#define V_FW_FOISCSI_CTRL_WR_DDIGEST(x)	((x) << S_FW_FOISCSI_CTRL_WR_DDIGEST)
+#define G_FW_FOISCSI_CTRL_WR_DDIGEST(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_DDIGEST) & M_FW_FOISCSI_CTRL_WR_DDIGEST)
+
+#define S_FW_FOISCSI_CTRL_WR_AUTH_METHOD	25
+#define M_FW_FOISCSI_CTRL_WR_AUTH_METHOD	0x7
+#define V_FW_FOISCSI_CTRL_WR_AUTH_METHOD(x)	\
+    ((x) << S_FW_FOISCSI_CTRL_WR_AUTH_METHOD)
+#define G_FW_FOISCSI_CTRL_WR_AUTH_METHOD(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_AUTH_METHOD) & \
+     M_FW_FOISCSI_CTRL_WR_AUTH_METHOD)
+
+#define S_FW_FOISCSI_CTRL_WR_AUTH_POLICY	23
+#define M_FW_FOISCSI_CTRL_WR_AUTH_POLICY	0x3
+#define V_FW_FOISCSI_CTRL_WR_AUTH_POLICY(x)	\
+    ((x) << S_FW_FOISCSI_CTRL_WR_AUTH_POLICY)
+#define G_FW_FOISCSI_CTRL_WR_AUTH_POLICY(x)	\
+    (((x) >> S_FW_FOISCSI_CTRL_WR_AUTH_POLICY) & \
+     M_FW_FOISCSI_CTRL_WR_AUTH_POLICY)
+
+struct fw_foiscsi_chap_wr {
+	__be32 op_compl;
+	__be32 flowid_len16;
+	__u64  cookie;
+	__u8   status;
+	__u8   id_len;
+	__u8   sec_len;
+	__u8   tgt_id_len;
+	__u8   tgt_sec_len;
+	__be16 node_id;
+	__u8   r2;
+	__u8   chap_id[64];
+	__u8   chap_sec[16];
+	__u8   tgt_id[64];
+	__u8   tgt_sec[16];
+};
+
+/******************************************************************************
+ *  F O F C O E   W O R K R E Q U E S T s
+ *******************************************/
+
+struct fw_fcoe_els_ct_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	__u8   tmo_val;
+	__u8   els_ct_type;
+	__u8   ctl_pri;
+	__u8   cp_en_class;
+	__be16 xfer_cnt;
+	__u8   fl_to_sp;
+	__u8   l_id[3];
+	__u8   r5;
+	__u8   r_id[3];
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r6;
+};
+
+#define S_FW_FCOE_ELS_CT_WR_OPCODE	24
+#define M_FW_FCOE_ELS_CT_WR_OPCODE	0xff
+#define V_FW_FCOE_ELS_CT_WR_OPCODE(x)	((x) << S_FW_FCOE_ELS_CT_WR_OPCODE)
+#define G_FW_FCOE_ELS_CT_WR_OPCODE(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_OPCODE) & M_FW_FCOE_ELS_CT_WR_OPCODE)
+
+#define S_FW_FCOE_ELS_CT_WR_IMMDLEN	0
+#define M_FW_FCOE_ELS_CT_WR_IMMDLEN	0xff
+#define V_FW_FCOE_ELS_CT_WR_IMMDLEN(x)	((x) << S_FW_FCOE_ELS_CT_WR_IMMDLEN)
+#define G_FW_FCOE_ELS_CT_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_IMMDLEN) & M_FW_FCOE_ELS_CT_WR_IMMDLEN)
+
+#define S_FW_FCOE_ELS_CT_WR_FLOWID	8
+#define M_FW_FCOE_ELS_CT_WR_FLOWID	0xfffff
+#define V_FW_FCOE_ELS_CT_WR_FLOWID(x)	((x) << S_FW_FCOE_ELS_CT_WR_FLOWID)
+#define G_FW_FCOE_ELS_CT_WR_FLOWID(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_FLOWID) & M_FW_FCOE_ELS_CT_WR_FLOWID)
+
+#define S_FW_FCOE_ELS_CT_WR_LEN16	0
+#define M_FW_FCOE_ELS_CT_WR_LEN16	0xff
+#define V_FW_FCOE_ELS_CT_WR_LEN16(x)	((x) << S_FW_FCOE_ELS_CT_WR_LEN16)
+#define G_FW_FCOE_ELS_CT_WR_LEN16(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_LEN16) & M_FW_FCOE_ELS_CT_WR_LEN16)
+
+#define S_FW_FCOE_ELS_CT_WR_CP_EN	6
+#define M_FW_FCOE_ELS_CT_WR_CP_EN	0x3
+#define V_FW_FCOE_ELS_CT_WR_CP_EN(x)	((x) << S_FW_FCOE_ELS_CT_WR_CP_EN)
+#define G_FW_FCOE_ELS_CT_WR_CP_EN(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_CP_EN) & M_FW_FCOE_ELS_CT_WR_CP_EN)
+
+#define S_FW_FCOE_ELS_CT_WR_CLASS	4
+#define M_FW_FCOE_ELS_CT_WR_CLASS	0x3
+#define V_FW_FCOE_ELS_CT_WR_CLASS(x)	((x) << S_FW_FCOE_ELS_CT_WR_CLASS)
+#define G_FW_FCOE_ELS_CT_WR_CLASS(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_CLASS) & M_FW_FCOE_ELS_CT_WR_CLASS)
+
+#define S_FW_FCOE_ELS_CT_WR_FL		2
+#define M_FW_FCOE_ELS_CT_WR_FL		0x1
+#define V_FW_FCOE_ELS_CT_WR_FL(x)	((x) << S_FW_FCOE_ELS_CT_WR_FL)
+#define G_FW_FCOE_ELS_CT_WR_FL(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_FL) & M_FW_FCOE_ELS_CT_WR_FL)
+#define F_FW_FCOE_ELS_CT_WR_FL	V_FW_FCOE_ELS_CT_WR_FL(1U)
+
+#define S_FW_FCOE_ELS_CT_WR_NPIV	1
+#define M_FW_FCOE_ELS_CT_WR_NPIV	0x1
+#define V_FW_FCOE_ELS_CT_WR_NPIV(x)	((x) << S_FW_FCOE_ELS_CT_WR_NPIV)
+#define G_FW_FCOE_ELS_CT_WR_NPIV(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_NPIV) & M_FW_FCOE_ELS_CT_WR_NPIV)
+#define F_FW_FCOE_ELS_CT_WR_NPIV	V_FW_FCOE_ELS_CT_WR_NPIV(1U)
+
+#define S_FW_FCOE_ELS_CT_WR_SP		0
+#define M_FW_FCOE_ELS_CT_WR_SP		0x1
+#define V_FW_FCOE_ELS_CT_WR_SP(x)	((x) << S_FW_FCOE_ELS_CT_WR_SP)
+#define G_FW_FCOE_ELS_CT_WR_SP(x)	\
+    (((x) >> S_FW_FCOE_ELS_CT_WR_SP) & M_FW_FCOE_ELS_CT_WR_SP)
+#define F_FW_FCOE_ELS_CT_WR_SP	V_FW_FCOE_ELS_CT_WR_SP(1U)
+
+/******************************************************************************
+ *  S C S I   W O R K R E Q U E S T s   (FOiSCSI and FCOE unified data path)
+ *****************************************************************************/
+
+struct fw_scsi_write_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	__u8   tmo_val;
+	__u8   use_xfer_cnt;
+	union fw_scsi_write_priv {
+		struct fcoe_write_priv {
+			__u8   ctl_pri;
+			__u8   cp_en_class;
+			__u8   r3_lo[2];
+		} fcoe;
+		struct iscsi_write_priv {
+			__u8   r3[4];
+		} iscsi;
+	} u;
+	__be32 xfer_cnt;
+	__be32 ini_xfer_cnt;
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
 	__be32 r4;
-	__be32 dlen;
-	__be32 r5;
+};
+
+#define S_FW_SCSI_WRITE_WR_OPCODE	24
+#define M_FW_SCSI_WRITE_WR_OPCODE	0xff
+#define V_FW_SCSI_WRITE_WR_OPCODE(x)	((x) << S_FW_SCSI_WRITE_WR_OPCODE)
+#define G_FW_SCSI_WRITE_WR_OPCODE(x)	\
+    (((x) >> S_FW_SCSI_WRITE_WR_OPCODE) & M_FW_SCSI_WRITE_WR_OPCODE)
+
+#define S_FW_SCSI_WRITE_WR_IMMDLEN	0
+#define M_FW_SCSI_WRITE_WR_IMMDLEN	0xff
+#define V_FW_SCSI_WRITE_WR_IMMDLEN(x)	((x) << S_FW_SCSI_WRITE_WR_IMMDLEN)
+#define G_FW_SCSI_WRITE_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_SCSI_WRITE_WR_IMMDLEN) & M_FW_SCSI_WRITE_WR_IMMDLEN)
+
+#define S_FW_SCSI_WRITE_WR_FLOWID	8
+#define M_FW_SCSI_WRITE_WR_FLOWID	0xfffff
+#define V_FW_SCSI_WRITE_WR_FLOWID(x)	((x) << S_FW_SCSI_WRITE_WR_FLOWID)
+#define G_FW_SCSI_WRITE_WR_FLOWID(x)	\
+    (((x) >> S_FW_SCSI_WRITE_WR_FLOWID) & M_FW_SCSI_WRITE_WR_FLOWID)
+
+#define S_FW_SCSI_WRITE_WR_LEN16	0
+#define M_FW_SCSI_WRITE_WR_LEN16	0xff
+#define V_FW_SCSI_WRITE_WR_LEN16(x)	((x) << S_FW_SCSI_WRITE_WR_LEN16)
+#define G_FW_SCSI_WRITE_WR_LEN16(x)	\
+    (((x) >> S_FW_SCSI_WRITE_WR_LEN16) & M_FW_SCSI_WRITE_WR_LEN16)
+
+#define S_FW_SCSI_WRITE_WR_CP_EN	6
+#define M_FW_SCSI_WRITE_WR_CP_EN	0x3
+#define V_FW_SCSI_WRITE_WR_CP_EN(x)	((x) << S_FW_SCSI_WRITE_WR_CP_EN)
+#define G_FW_SCSI_WRITE_WR_CP_EN(x)	\
+    (((x) >> S_FW_SCSI_WRITE_WR_CP_EN) & M_FW_SCSI_WRITE_WR_CP_EN)
+
+#define S_FW_SCSI_WRITE_WR_CLASS	4
+#define M_FW_SCSI_WRITE_WR_CLASS	0x3
+#define V_FW_SCSI_WRITE_WR_CLASS(x)	((x) << S_FW_SCSI_WRITE_WR_CLASS)
+#define G_FW_SCSI_WRITE_WR_CLASS(x)	\
+    (((x) >> S_FW_SCSI_WRITE_WR_CLASS) & M_FW_SCSI_WRITE_WR_CLASS)
+
+struct fw_scsi_read_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	__u8   tmo_val;
+	__u8   use_xfer_cnt;
+	union fw_scsi_read_priv {
+		struct fcoe_read_priv {
+			__u8   ctl_pri;
+			__u8   cp_en_class;
+			__u8   r3_lo[2];
+		} fcoe;
+		struct iscsi_read_priv {
+			__u8   r3[4];
+		} iscsi;
+	} u;
+	__be32 xfer_cnt;
+	__be32 ini_xfer_cnt;
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r4;
+};
+
+#define S_FW_SCSI_READ_WR_OPCODE	24
+#define M_FW_SCSI_READ_WR_OPCODE	0xff
+#define V_FW_SCSI_READ_WR_OPCODE(x)	((x) << S_FW_SCSI_READ_WR_OPCODE)
+#define G_FW_SCSI_READ_WR_OPCODE(x)	\
+    (((x) >> S_FW_SCSI_READ_WR_OPCODE) & M_FW_SCSI_READ_WR_OPCODE)
+
+#define S_FW_SCSI_READ_WR_IMMDLEN	0
+#define M_FW_SCSI_READ_WR_IMMDLEN	0xff
+#define V_FW_SCSI_READ_WR_IMMDLEN(x)	((x) << S_FW_SCSI_READ_WR_IMMDLEN)
+#define G_FW_SCSI_READ_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_SCSI_READ_WR_IMMDLEN) & M_FW_SCSI_READ_WR_IMMDLEN)
+
+#define S_FW_SCSI_READ_WR_FLOWID	8
+#define M_FW_SCSI_READ_WR_FLOWID	0xfffff
+#define V_FW_SCSI_READ_WR_FLOWID(x)	((x) << S_FW_SCSI_READ_WR_FLOWID)
+#define G_FW_SCSI_READ_WR_FLOWID(x)	\
+    (((x) >> S_FW_SCSI_READ_WR_FLOWID) & M_FW_SCSI_READ_WR_FLOWID)
+
+#define S_FW_SCSI_READ_WR_LEN16		0
+#define M_FW_SCSI_READ_WR_LEN16		0xff
+#define V_FW_SCSI_READ_WR_LEN16(x)	((x) << S_FW_SCSI_READ_WR_LEN16)
+#define G_FW_SCSI_READ_WR_LEN16(x)	\
+    (((x) >> S_FW_SCSI_READ_WR_LEN16) & M_FW_SCSI_READ_WR_LEN16)
+
+#define S_FW_SCSI_READ_WR_CP_EN		6
+#define M_FW_SCSI_READ_WR_CP_EN		0x3
+#define V_FW_SCSI_READ_WR_CP_EN(x)	((x) << S_FW_SCSI_READ_WR_CP_EN)
+#define G_FW_SCSI_READ_WR_CP_EN(x)	\
+    (((x) >> S_FW_SCSI_READ_WR_CP_EN) & M_FW_SCSI_READ_WR_CP_EN)
+
+#define S_FW_SCSI_READ_WR_CLASS		4
+#define M_FW_SCSI_READ_WR_CLASS		0x3
+#define V_FW_SCSI_READ_WR_CLASS(x)	((x) << S_FW_SCSI_READ_WR_CLASS)
+#define G_FW_SCSI_READ_WR_CLASS(x)	\
+    (((x) >> S_FW_SCSI_READ_WR_CLASS) & M_FW_SCSI_READ_WR_CLASS)
+
+struct fw_scsi_cmd_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	__u8   tmo_val;
+	__u8   r3;
+	union fw_scsi_cmd_priv {
+		struct fcoe_cmd_priv {
+			__u8   ctl_pri;
+			__u8   cp_en_class;
+			__u8   r4_lo[2];
+		} fcoe;
+		struct iscsi_cmd_priv {
+			__u8   r4[4];
+		} iscsi;
+	} u;
+	__u8   r5[8];
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
 	__be32 r6;
-	__u8   cdb[16];
-};
-
-#define S_FW_SCSI_ISCSI_DATA_FBIT	7
-#define M_FW_SCSI_ISCSI_DATA_FBIT	0x1
-#define V_FW_SCSI_ISCSI_DATA_FBIT(x)	((x) << S_FW_SCSI_ISCSI_DATA_FBIT)
-#define G_FW_SCSI_ISCSI_DATA_FBIT(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_DATA_FBIT) & M_FW_SCSI_ISCSI_DATA_FBIT)
-#define F_FW_SCSI_ISCSI_DATA_FBIT	V_FW_SCSI_ISCSI_DATA_FBIT(1U)
-
-#define S_FW_SCSI_ISCSI_DATA_RBIT	6
-#define M_FW_SCSI_ISCSI_DATA_RBIT	0x1
-#define V_FW_SCSI_ISCSI_DATA_RBIT(x)	((x) << S_FW_SCSI_ISCSI_DATA_RBIT)
-#define G_FW_SCSI_ISCSI_DATA_RBIT(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_DATA_RBIT) & M_FW_SCSI_ISCSI_DATA_RBIT)
-#define F_FW_SCSI_ISCSI_DATA_RBIT	V_FW_SCSI_ISCSI_DATA_RBIT(1U)
-
-#define S_FW_SCSI_ISCSI_DATA_WBIT	5
-#define M_FW_SCSI_ISCSI_DATA_WBIT	0x1
-#define V_FW_SCSI_ISCSI_DATA_WBIT(x)	((x) << S_FW_SCSI_ISCSI_DATA_WBIT)
-#define G_FW_SCSI_ISCSI_DATA_WBIT(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_DATA_WBIT) & M_FW_SCSI_ISCSI_DATA_WBIT)
-#define F_FW_SCSI_ISCSI_DATA_WBIT	V_FW_SCSI_ISCSI_DATA_WBIT(1U)
-
-#define S_FW_SCSI_ISCSI_DATA_TATTR	0
-#define M_FW_SCSI_ISCSI_DATA_TATTR	0x7
-#define V_FW_SCSI_ISCSI_DATA_TATTR(x)	((x) << S_FW_SCSI_ISCSI_DATA_TATTR)
-#define G_FW_SCSI_ISCSI_DATA_TATTR(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_DATA_TATTR) & M_FW_SCSI_ISCSI_DATA_TATTR)
-
-#define FW_SCSI_ISCSI_DATA_TATTR_UNTAGGED	0
-#define FW_SCSI_ISCSI_DATA_TATTR_SIMPLE		1
-#define	FW_SCSI_ISCSI_DATA_TATTR_ORDERED	2
-#define FW_SCSI_ISCSI_DATA_TATTR_HEADOQ		3
-#define FW_SCSI_ISCSI_DATA_TATTR_ACA		4
-
-#define FW_SCSI_ISCSI_TMF_OP			0x02
-#define FW_SCSI_ISCSI_ABORT_FUNC		0x01
-#define FW_SCSI_ISCSI_LUN_RESET_FUNC		0x05
-#define FW_SCSI_ISCSI_RESERVED_TAG		0xffffffff
-
-struct fw_scsi_iscsi_rsp {
-	__u8   r0;
-	__u8   sbit_to_uflow;
-	__u8   response;
-	__u8   status;
+};
+
+#define S_FW_SCSI_CMD_WR_OPCODE		24
+#define M_FW_SCSI_CMD_WR_OPCODE		0xff
+#define V_FW_SCSI_CMD_WR_OPCODE(x)	((x) << S_FW_SCSI_CMD_WR_OPCODE)
+#define G_FW_SCSI_CMD_WR_OPCODE(x)	\
+    (((x) >> S_FW_SCSI_CMD_WR_OPCODE) & M_FW_SCSI_CMD_WR_OPCODE)
+
+#define S_FW_SCSI_CMD_WR_IMMDLEN	0
+#define M_FW_SCSI_CMD_WR_IMMDLEN	0xff
+#define V_FW_SCSI_CMD_WR_IMMDLEN(x)	((x) << S_FW_SCSI_CMD_WR_IMMDLEN)
+#define G_FW_SCSI_CMD_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_SCSI_CMD_WR_IMMDLEN) & M_FW_SCSI_CMD_WR_IMMDLEN)
+
+#define S_FW_SCSI_CMD_WR_FLOWID		8
+#define M_FW_SCSI_CMD_WR_FLOWID		0xfffff
+#define V_FW_SCSI_CMD_WR_FLOWID(x)	((x) << S_FW_SCSI_CMD_WR_FLOWID)
+#define G_FW_SCSI_CMD_WR_FLOWID(x)	\
+    (((x) >> S_FW_SCSI_CMD_WR_FLOWID) & M_FW_SCSI_CMD_WR_FLOWID)
+
+#define S_FW_SCSI_CMD_WR_LEN16		0
+#define M_FW_SCSI_CMD_WR_LEN16		0xff
+#define V_FW_SCSI_CMD_WR_LEN16(x)	((x) << S_FW_SCSI_CMD_WR_LEN16)
+#define G_FW_SCSI_CMD_WR_LEN16(x)	\
+    (((x) >> S_FW_SCSI_CMD_WR_LEN16) & M_FW_SCSI_CMD_WR_LEN16)
+
+#define S_FW_SCSI_CMD_WR_CP_EN		6
+#define M_FW_SCSI_CMD_WR_CP_EN		0x3
+#define V_FW_SCSI_CMD_WR_CP_EN(x)	((x) << S_FW_SCSI_CMD_WR_CP_EN)
+#define G_FW_SCSI_CMD_WR_CP_EN(x)	\
+    (((x) >> S_FW_SCSI_CMD_WR_CP_EN) & M_FW_SCSI_CMD_WR_CP_EN)
+
+#define S_FW_SCSI_CMD_WR_CLASS		4
+#define M_FW_SCSI_CMD_WR_CLASS		0x3
+#define V_FW_SCSI_CMD_WR_CLASS(x)	((x) << S_FW_SCSI_CMD_WR_CLASS)
+#define G_FW_SCSI_CMD_WR_CLASS(x)	\
+    (((x) >> S_FW_SCSI_CMD_WR_CLASS) & M_FW_SCSI_CMD_WR_CLASS)
+
+struct fw_scsi_abrt_cls_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	__u8   tmo_val;
+	__u8   sub_opcode_to_chk_all_io;
+	__u8   r3[4];
+	__be64 t_cookie;
+};
+
+#define S_FW_SCSI_ABRT_CLS_WR_OPCODE	24
+#define M_FW_SCSI_ABRT_CLS_WR_OPCODE	0xff
+#define V_FW_SCSI_ABRT_CLS_WR_OPCODE(x)	((x) << S_FW_SCSI_ABRT_CLS_WR_OPCODE)
+#define G_FW_SCSI_ABRT_CLS_WR_OPCODE(x)	\
+    (((x) >> S_FW_SCSI_ABRT_CLS_WR_OPCODE) & M_FW_SCSI_ABRT_CLS_WR_OPCODE)
+
+#define S_FW_SCSI_ABRT_CLS_WR_IMMDLEN		0
+#define M_FW_SCSI_ABRT_CLS_WR_IMMDLEN		0xff
+#define V_FW_SCSI_ABRT_CLS_WR_IMMDLEN(x)	\
+    ((x) << S_FW_SCSI_ABRT_CLS_WR_IMMDLEN)
+#define G_FW_SCSI_ABRT_CLS_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_SCSI_ABRT_CLS_WR_IMMDLEN) & M_FW_SCSI_ABRT_CLS_WR_IMMDLEN)
+
+#define S_FW_SCSI_ABRT_CLS_WR_FLOWID	8
+#define M_FW_SCSI_ABRT_CLS_WR_FLOWID	0xfffff
+#define V_FW_SCSI_ABRT_CLS_WR_FLOWID(x)	((x) << S_FW_SCSI_ABRT_CLS_WR_FLOWID)
+#define G_FW_SCSI_ABRT_CLS_WR_FLOWID(x)	\
+    (((x) >> S_FW_SCSI_ABRT_CLS_WR_FLOWID) & M_FW_SCSI_ABRT_CLS_WR_FLOWID)
+
+#define S_FW_SCSI_ABRT_CLS_WR_LEN16	0
+#define M_FW_SCSI_ABRT_CLS_WR_LEN16	0xff
+#define V_FW_SCSI_ABRT_CLS_WR_LEN16(x)	((x) << S_FW_SCSI_ABRT_CLS_WR_LEN16)
+#define G_FW_SCSI_ABRT_CLS_WR_LEN16(x)	\
+    (((x) >> S_FW_SCSI_ABRT_CLS_WR_LEN16) & M_FW_SCSI_ABRT_CLS_WR_LEN16)
+
+#define S_FW_SCSI_ABRT_CLS_WR_SUB_OPCODE	2
+#define M_FW_SCSI_ABRT_CLS_WR_SUB_OPCODE	0x3f
+#define V_FW_SCSI_ABRT_CLS_WR_SUB_OPCODE(x)	\
+    ((x) << S_FW_SCSI_ABRT_CLS_WR_SUB_OPCODE)
+#define G_FW_SCSI_ABRT_CLS_WR_SUB_OPCODE(x)	\
+    (((x) >> S_FW_SCSI_ABRT_CLS_WR_SUB_OPCODE) & \
+     M_FW_SCSI_ABRT_CLS_WR_SUB_OPCODE)
+
+#define S_FW_SCSI_ABRT_CLS_WR_UNSOL	1
+#define M_FW_SCSI_ABRT_CLS_WR_UNSOL	0x1
+#define V_FW_SCSI_ABRT_CLS_WR_UNSOL(x)	((x) << S_FW_SCSI_ABRT_CLS_WR_UNSOL)
+#define G_FW_SCSI_ABRT_CLS_WR_UNSOL(x)	\
+    (((x) >> S_FW_SCSI_ABRT_CLS_WR_UNSOL) & M_FW_SCSI_ABRT_CLS_WR_UNSOL)
+#define F_FW_SCSI_ABRT_CLS_WR_UNSOL	V_FW_SCSI_ABRT_CLS_WR_UNSOL(1U)
+
+#define S_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO	0
+#define M_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO	0x1
+#define V_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO(x)	\
+    ((x) << S_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO)
+#define G_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO(x)	\
+    (((x) >> S_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO) & \
+     M_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO)
+#define F_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO	\
+    V_FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO(1U)
+
+struct fw_scsi_tgt_acc_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	__u8   r3;
+	__u8   use_burst_len;
+	union fw_scsi_tgt_acc_priv {
+		struct fcoe_tgt_acc_priv {
+			__u8   ctl_pri;
+			__u8   cp_en_class;
+			__u8   r4_lo[2];
+		} fcoe;
+		struct iscsi_tgt_acc_priv {
+			__u8   r4[4];
+		} iscsi;
+	} u;
+	__be32 burst_len;
+	__be32 rel_off;
+	__be64 r5;
+	__be32 r6;
+	__be32 tot_xfer_len;
+};
+
+#define S_FW_SCSI_TGT_ACC_WR_OPCODE	24
+#define M_FW_SCSI_TGT_ACC_WR_OPCODE	0xff
+#define V_FW_SCSI_TGT_ACC_WR_OPCODE(x)	((x) << S_FW_SCSI_TGT_ACC_WR_OPCODE)
+#define G_FW_SCSI_TGT_ACC_WR_OPCODE(x)	\
+    (((x) >> S_FW_SCSI_TGT_ACC_WR_OPCODE) & M_FW_SCSI_TGT_ACC_WR_OPCODE)
+
+#define S_FW_SCSI_TGT_ACC_WR_IMMDLEN	0
+#define M_FW_SCSI_TGT_ACC_WR_IMMDLEN	0xff
+#define V_FW_SCSI_TGT_ACC_WR_IMMDLEN(x)	((x) << S_FW_SCSI_TGT_ACC_WR_IMMDLEN)
+#define G_FW_SCSI_TGT_ACC_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_SCSI_TGT_ACC_WR_IMMDLEN) & M_FW_SCSI_TGT_ACC_WR_IMMDLEN)
+
+#define S_FW_SCSI_TGT_ACC_WR_FLOWID	8
+#define M_FW_SCSI_TGT_ACC_WR_FLOWID	0xfffff
+#define V_FW_SCSI_TGT_ACC_WR_FLOWID(x)	((x) << S_FW_SCSI_TGT_ACC_WR_FLOWID)
+#define G_FW_SCSI_TGT_ACC_WR_FLOWID(x)	\
+    (((x) >> S_FW_SCSI_TGT_ACC_WR_FLOWID) & M_FW_SCSI_TGT_ACC_WR_FLOWID)
+
+#define S_FW_SCSI_TGT_ACC_WR_LEN16	0
+#define M_FW_SCSI_TGT_ACC_WR_LEN16	0xff
+#define V_FW_SCSI_TGT_ACC_WR_LEN16(x)	((x) << S_FW_SCSI_TGT_ACC_WR_LEN16)
+#define G_FW_SCSI_TGT_ACC_WR_LEN16(x)	\
+    (((x) >> S_FW_SCSI_TGT_ACC_WR_LEN16) & M_FW_SCSI_TGT_ACC_WR_LEN16)
+
+#define S_FW_SCSI_TGT_ACC_WR_CP_EN	6
+#define M_FW_SCSI_TGT_ACC_WR_CP_EN	0x3
+#define V_FW_SCSI_TGT_ACC_WR_CP_EN(x)	((x) << S_FW_SCSI_TGT_ACC_WR_CP_EN)
+#define G_FW_SCSI_TGT_ACC_WR_CP_EN(x)	\
+    (((x) >> S_FW_SCSI_TGT_ACC_WR_CP_EN) & M_FW_SCSI_TGT_ACC_WR_CP_EN)
+
+#define S_FW_SCSI_TGT_ACC_WR_CLASS	4
+#define M_FW_SCSI_TGT_ACC_WR_CLASS	0x3
+#define V_FW_SCSI_TGT_ACC_WR_CLASS(x)	((x) << S_FW_SCSI_TGT_ACC_WR_CLASS)
+#define G_FW_SCSI_TGT_ACC_WR_CLASS(x)	\
+    (((x) >> S_FW_SCSI_TGT_ACC_WR_CLASS) & M_FW_SCSI_TGT_ACC_WR_CLASS)
+
+struct fw_scsi_tgt_xmit_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	__u8   auto_rsp;
+	__u8   use_xfer_cnt;
+	union fw_scsi_tgt_xmit_priv {
+		struct fcoe_tgt_xmit_priv {
+			__u8   ctl_pri;
+			__u8   cp_en_class;
+			__u8   r3_lo[2];
+		} fcoe;
+		struct iscsi_tgt_xmit_priv {
+			__u8   r3[4];
+		} iscsi;
+	} u;
+	__be32 xfer_cnt;
 	__be32 r4;
-	__u8   r5[32];
-	__be32 bidir_res_cnt;
-	__be32 res_cnt;
-	__u8   sense_data[128];
-};
-
-#define S_FW_SCSI_ISCSI_RSP_SBIT	7
-#define M_FW_SCSI_ISCSI_RSP_SBIT	0x1
-#define V_FW_SCSI_ISCSI_RSP_SBIT(x)	((x) << S_FW_SCSI_ISCSI_RSP_SBIT)
-#define G_FW_SCSI_ISCSI_RSP_SBIT(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_RSP_SBIT) & M_FW_SCSI_ISCSI_RSP_SBIT)
-#define F_FW_SCSI_ISCSI_RSP_SBIT	V_FW_SCSI_ISCSI_RSP_SBIT(1U)
-
-#define S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW		4
-#define M_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW		0x1
-#define V_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(x)	\
-    ((x) << S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW)
-#define G_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW) & \
-     M_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW)
-#define F_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW	V_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(1U)
-
-#define S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW		3
-#define M_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW		0x1
-#define V_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(x)	\
-    ((x) << S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW)
-#define G_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW) & \
-     M_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW)
-#define F_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW	V_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(1U)
-
-#define S_FW_SCSI_ISCSI_RSP_OFLOW	2
-#define M_FW_SCSI_ISCSI_RSP_OFLOW	0x1
-#define V_FW_SCSI_ISCSI_RSP_OFLOW(x)	((x) << S_FW_SCSI_ISCSI_RSP_OFLOW)
-#define G_FW_SCSI_ISCSI_RSP_OFLOW(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_RSP_OFLOW) & M_FW_SCSI_ISCSI_RSP_OFLOW)
-#define F_FW_SCSI_ISCSI_RSP_OFLOW	V_FW_SCSI_ISCSI_RSP_OFLOW(1U)
-
-#define S_FW_SCSI_ISCSI_RSP_UFLOW	1
-#define M_FW_SCSI_ISCSI_RSP_UFLOW	0x1
-#define V_FW_SCSI_ISCSI_RSP_UFLOW(x)	((x) << S_FW_SCSI_ISCSI_RSP_UFLOW)
-#define G_FW_SCSI_ISCSI_RSP_UFLOW(x)	\
-    (((x) >> S_FW_SCSI_ISCSI_RSP_UFLOW) & M_FW_SCSI_ISCSI_RSP_UFLOW)
-#define F_FW_SCSI_ISCSI_RSP_UFLOW	V_FW_SCSI_ISCSI_RSP_UFLOW(1U)
+	__be64 r5;
+	__be32 r6;
+	__be32 tot_xfer_len;
+};
+
+#define S_FW_SCSI_TGT_XMIT_WR_OPCODE	24
+#define M_FW_SCSI_TGT_XMIT_WR_OPCODE	0xff
+#define V_FW_SCSI_TGT_XMIT_WR_OPCODE(x)	((x) << S_FW_SCSI_TGT_XMIT_WR_OPCODE)
+#define G_FW_SCSI_TGT_XMIT_WR_OPCODE(x)	\
+    (((x) >> S_FW_SCSI_TGT_XMIT_WR_OPCODE) & M_FW_SCSI_TGT_XMIT_WR_OPCODE)
+
+#define S_FW_SCSI_TGT_XMIT_WR_IMMDLEN		0
+#define M_FW_SCSI_TGT_XMIT_WR_IMMDLEN		0xff
+#define V_FW_SCSI_TGT_XMIT_WR_IMMDLEN(x)	\
+    ((x) << S_FW_SCSI_TGT_XMIT_WR_IMMDLEN)
+#define G_FW_SCSI_TGT_XMIT_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_SCSI_TGT_XMIT_WR_IMMDLEN) & M_FW_SCSI_TGT_XMIT_WR_IMMDLEN)
+
+#define S_FW_SCSI_TGT_XMIT_WR_FLOWID	8
+#define M_FW_SCSI_TGT_XMIT_WR_FLOWID	0xfffff
+#define V_FW_SCSI_TGT_XMIT_WR_FLOWID(x)	((x) << S_FW_SCSI_TGT_XMIT_WR_FLOWID)
+#define G_FW_SCSI_TGT_XMIT_WR_FLOWID(x)	\
+    (((x) >> S_FW_SCSI_TGT_XMIT_WR_FLOWID) & M_FW_SCSI_TGT_XMIT_WR_FLOWID)
+
+#define S_FW_SCSI_TGT_XMIT_WR_LEN16	0
+#define M_FW_SCSI_TGT_XMIT_WR_LEN16	0xff
+#define V_FW_SCSI_TGT_XMIT_WR_LEN16(x)	((x) << S_FW_SCSI_TGT_XMIT_WR_LEN16)
+#define G_FW_SCSI_TGT_XMIT_WR_LEN16(x)	\
+    (((x) >> S_FW_SCSI_TGT_XMIT_WR_LEN16) & M_FW_SCSI_TGT_XMIT_WR_LEN16)
+
+#define S_FW_SCSI_TGT_XMIT_WR_CP_EN	6
+#define M_FW_SCSI_TGT_XMIT_WR_CP_EN	0x3
+#define V_FW_SCSI_TGT_XMIT_WR_CP_EN(x)	((x) << S_FW_SCSI_TGT_XMIT_WR_CP_EN)
+#define G_FW_SCSI_TGT_XMIT_WR_CP_EN(x)	\
+    (((x) >> S_FW_SCSI_TGT_XMIT_WR_CP_EN) & M_FW_SCSI_TGT_XMIT_WR_CP_EN)
+
+#define S_FW_SCSI_TGT_XMIT_WR_CLASS	4
+#define M_FW_SCSI_TGT_XMIT_WR_CLASS	0x3
+#define V_FW_SCSI_TGT_XMIT_WR_CLASS(x)	((x) << S_FW_SCSI_TGT_XMIT_WR_CLASS)
+#define G_FW_SCSI_TGT_XMIT_WR_CLASS(x)	\
+    (((x) >> S_FW_SCSI_TGT_XMIT_WR_CLASS) & M_FW_SCSI_TGT_XMIT_WR_CLASS)
+
+struct fw_scsi_tgt_rsp_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	__u8   r3[2];
+	union fw_scsi_tgt_rsp_priv {
+		struct fcoe_tgt_rsp_priv {
+			__u8   ctl_pri;
+			__u8   cp_en_class;
+			__u8   r4_lo[2];
+		} fcoe;
+		struct iscsi_tgt_rsp_priv {
+			__u8   r4[4];
+		} iscsi;
+	} u;
+	__u8   r5[8];
+};
+
+#define S_FW_SCSI_TGT_RSP_WR_OPCODE	24
+#define M_FW_SCSI_TGT_RSP_WR_OPCODE	0xff
+#define V_FW_SCSI_TGT_RSP_WR_OPCODE(x)	((x) << S_FW_SCSI_TGT_RSP_WR_OPCODE)
+#define G_FW_SCSI_TGT_RSP_WR_OPCODE(x)	\
+    (((x) >> S_FW_SCSI_TGT_RSP_WR_OPCODE) & M_FW_SCSI_TGT_RSP_WR_OPCODE)
+
+#define S_FW_SCSI_TGT_RSP_WR_IMMDLEN	0
+#define M_FW_SCSI_TGT_RSP_WR_IMMDLEN	0xff
+#define V_FW_SCSI_TGT_RSP_WR_IMMDLEN(x)	((x) << S_FW_SCSI_TGT_RSP_WR_IMMDLEN)
+#define G_FW_SCSI_TGT_RSP_WR_IMMDLEN(x)	\
+    (((x) >> S_FW_SCSI_TGT_RSP_WR_IMMDLEN) & M_FW_SCSI_TGT_RSP_WR_IMMDLEN)
+
+#define S_FW_SCSI_TGT_RSP_WR_FLOWID	8
+#define M_FW_SCSI_TGT_RSP_WR_FLOWID	0xfffff
+#define V_FW_SCSI_TGT_RSP_WR_FLOWID(x)	((x) << S_FW_SCSI_TGT_RSP_WR_FLOWID)
+#define G_FW_SCSI_TGT_RSP_WR_FLOWID(x)	\
+    (((x) >> S_FW_SCSI_TGT_RSP_WR_FLOWID) & M_FW_SCSI_TGT_RSP_WR_FLOWID)
+
+#define S_FW_SCSI_TGT_RSP_WR_LEN16	0
+#define M_FW_SCSI_TGT_RSP_WR_LEN16	0xff
+#define V_FW_SCSI_TGT_RSP_WR_LEN16(x)	((x) << S_FW_SCSI_TGT_RSP_WR_LEN16)
+#define G_FW_SCSI_TGT_RSP_WR_LEN16(x)	\
+    (((x) >> S_FW_SCSI_TGT_RSP_WR_LEN16) & M_FW_SCSI_TGT_RSP_WR_LEN16)
+
+#define S_FW_SCSI_TGT_RSP_WR_CP_EN	6
+#define M_FW_SCSI_TGT_RSP_WR_CP_EN	0x3
+#define V_FW_SCSI_TGT_RSP_WR_CP_EN(x)	((x) << S_FW_SCSI_TGT_RSP_WR_CP_EN)
+#define G_FW_SCSI_TGT_RSP_WR_CP_EN(x)	\
+    (((x) >> S_FW_SCSI_TGT_RSP_WR_CP_EN) & M_FW_SCSI_TGT_RSP_WR_CP_EN)
+
+#define S_FW_SCSI_TGT_RSP_WR_CLASS	4
+#define M_FW_SCSI_TGT_RSP_WR_CLASS	0x3
+#define V_FW_SCSI_TGT_RSP_WR_CLASS(x)	((x) << S_FW_SCSI_TGT_RSP_WR_CLASS)
+#define G_FW_SCSI_TGT_RSP_WR_CLASS(x)	\
+    (((x) >> S_FW_SCSI_TGT_RSP_WR_CLASS) & M_FW_SCSI_TGT_RSP_WR_CLASS)
 
 /******************************************************************************
  *  C O M M A N D s
@@ -1808,9 +2834,15 @@
 	FW_RSS_VI_CONFIG_CMD           = 0x23,
 	FW_SCHED_CMD                   = 0x24,
 	FW_DEVLOG_CMD                  = 0x25,
-	FW_NETIF_CMD                   = 0x26,
 	FW_WATCHDOG_CMD                = 0x27,
 	FW_CLIP_CMD                    = 0x28,
+	FW_CHNET_IFACE_CMD             = 0x26,
+	FW_FCOE_RES_INFO_CMD           = 0x31,
+	FW_FCOE_LINK_CMD               = 0x32,
+	FW_FCOE_VNP_CMD                = 0x33,
+	FW_FCOE_SPARAMS_CMD            = 0x35,
+	FW_FCOE_STATS_CMD              = 0x37,
+	FW_FCOE_FCF_CMD                = 0x38,
 	FW_LASTC2E_CMD                 = 0x40,
 	FW_ERROR_CMD                   = 0x80,
 	FW_DEBUG_CMD                   = 0x81,
@@ -1896,6 +2928,7 @@
 	FW_LDST_ADDRSPC_FUNC      = 0x0028,
 	FW_LDST_ADDRSPC_FUNC_PCIE = 0x0029,
 	FW_LDST_ADDRSPC_FUNC_I2C  = 0x002A,
+	FW_LDST_ADDRSPC_LE	  = 0x0030,
 };
 
 /*
@@ -1982,6 +3015,11 @@
 			__u8   data;
 			__be32 r9;
 		} i2c;
+		struct fw_ldst_le {
+			__be16	region;
+			__be16	nval;
+			__u32	val[12];
+		} le;
 	} u;
 };
 
@@ -2291,6 +3329,7 @@
 	FW_MEMTYPE_CF_EDC1		= 0x1,
 	FW_MEMTYPE_CF_EXTMEM		= 0x2,
 	FW_MEMTYPE_CF_FLASH		= 0x4,
+	FW_MEMTYPE_CF_INTERNAL		= 0x5,
 };
 
 struct fw_caps_config_cmd {
@@ -2409,7 +3448,11 @@
 	FW_PARAMS_PARAM_PFVF_IQFLINT_START = 0x29,
 	FW_PARAMS_PARAM_PFVF_IQFLINT_END = 0x2A,
 	FW_PARAMS_PARAM_PFVF_EQ_START	= 0x2B,
-	FW_PARAMS_PARAM_PFVF_EQ_END	= 0x2C
+	FW_PARAMS_PARAM_PFVF_EQ_END	= 0x2C,
+	FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_START = 0x2D,
+	FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_END = 0x2E,
+	FW_PARAMS_PARAM_PFVF_ETHOFLD_START = 0x2F,
+	FW_PARAMS_PARAM_PFVF_ETHOFLD_END = 0x30
 };
 
 /*
@@ -2421,6 +3464,7 @@
 	FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_MNGT = 0x10,
 	FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11,
 	FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
+	FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13
 };
 
 /*
@@ -3954,7 +4998,6 @@
 #define FW_VI_MAC_ADD_MAC		0x3FF
 #define FW_VI_MAC_ADD_PERSIST_MAC	0x3FE
 #define FW_VI_MAC_MAC_BASED_FREE	0x3FD
-#define FW_CLS_TCAM_NUM_ENTRIES		336
 
 enum fw_vi_mac_smac {
 	FW_VI_MAC_MPS_TCAM_ENTRY,
@@ -5662,110 +6705,6 @@
     (((x) >> S_FW_DEVLOG_CMD_MEMADDR16_DEVLOG) & \
      M_FW_DEVLOG_CMD_MEMADDR16_DEVLOG)
 
-struct fw_netif_cmd {
-	__be32 op_to_ipv4gw;
-	__be32 retval_len16;
-	__be32 netifi_ifadridx;
-	__be32 portid_to_mtuval;
-	__be32 gwaddr;
-	__be32 addr;
-	__be32 nmask;
-	__be32 bcaddr;
-};
-
-#define S_FW_NETIF_CMD_ADD	20
-#define M_FW_NETIF_CMD_ADD	0x1
-#define V_FW_NETIF_CMD_ADD(x)	((x) << S_FW_NETIF_CMD_ADD)
-#define G_FW_NETIF_CMD_ADD(x)	\
-    (((x) >> S_FW_NETIF_CMD_ADD) & M_FW_NETIF_CMD_ADD)
-#define F_FW_NETIF_CMD_ADD	V_FW_NETIF_CMD_ADD(1U)
-
-#define S_FW_NETIF_CMD_LINK	19
-#define M_FW_NETIF_CMD_LINK	0x1
-#define V_FW_NETIF_CMD_LINK(x)	((x) << S_FW_NETIF_CMD_LINK)
-#define G_FW_NETIF_CMD_LINK(x)	\
-    (((x) >> S_FW_NETIF_CMD_LINK) & M_FW_NETIF_CMD_LINK)
-#define F_FW_NETIF_CMD_LINK	V_FW_NETIF_CMD_LINK(1U)
-
-#define S_FW_NETIF_CMD_VLAN	18
-#define M_FW_NETIF_CMD_VLAN	0x1
-#define V_FW_NETIF_CMD_VLAN(x)	((x) << S_FW_NETIF_CMD_VLAN)
-#define G_FW_NETIF_CMD_VLAN(x)	\
-    (((x) >> S_FW_NETIF_CMD_VLAN) & M_FW_NETIF_CMD_VLAN)
-#define F_FW_NETIF_CMD_VLAN	V_FW_NETIF_CMD_VLAN(1U)
-
-#define S_FW_NETIF_CMD_MTU	17
-#define M_FW_NETIF_CMD_MTU	0x1
-#define V_FW_NETIF_CMD_MTU(x)	((x) << S_FW_NETIF_CMD_MTU)
-#define G_FW_NETIF_CMD_MTU(x)	\
-    (((x) >> S_FW_NETIF_CMD_MTU) & M_FW_NETIF_CMD_MTU)
-#define F_FW_NETIF_CMD_MTU	V_FW_NETIF_CMD_MTU(1U)
-
-#define S_FW_NETIF_CMD_DHCP	16
-#define M_FW_NETIF_CMD_DHCP	0x1
-#define V_FW_NETIF_CMD_DHCP(x)	((x) << S_FW_NETIF_CMD_DHCP)
-#define G_FW_NETIF_CMD_DHCP(x)	\
-    (((x) >> S_FW_NETIF_CMD_DHCP) & M_FW_NETIF_CMD_DHCP)
-#define F_FW_NETIF_CMD_DHCP	V_FW_NETIF_CMD_DHCP(1U)
-
-#define S_FW_NETIF_CMD_IPV4BCADDR	15
-#define M_FW_NETIF_CMD_IPV4BCADDR	0x1
-#define V_FW_NETIF_CMD_IPV4BCADDR(x)	((x) << S_FW_NETIF_CMD_IPV4BCADDR)
-#define G_FW_NETIF_CMD_IPV4BCADDR(x)	\
-    (((x) >> S_FW_NETIF_CMD_IPV4BCADDR) & M_FW_NETIF_CMD_IPV4BCADDR)
-#define F_FW_NETIF_CMD_IPV4BCADDR	V_FW_NETIF_CMD_IPV4BCADDR(1U)
-
-#define S_FW_NETIF_CMD_IPV4NMASK	14
-#define M_FW_NETIF_CMD_IPV4NMASK	0x1
-#define V_FW_NETIF_CMD_IPV4NMASK(x)	((x) << S_FW_NETIF_CMD_IPV4NMASK)
-#define G_FW_NETIF_CMD_IPV4NMASK(x)	\
-    (((x) >> S_FW_NETIF_CMD_IPV4NMASK) & M_FW_NETIF_CMD_IPV4NMASK)
-#define F_FW_NETIF_CMD_IPV4NMASK	V_FW_NETIF_CMD_IPV4NMASK(1U)
-
-#define S_FW_NETIF_CMD_IPV4ADDR		13
-#define M_FW_NETIF_CMD_IPV4ADDR		0x1
-#define V_FW_NETIF_CMD_IPV4ADDR(x)	((x) << S_FW_NETIF_CMD_IPV4ADDR)
-#define G_FW_NETIF_CMD_IPV4ADDR(x)	\
-    (((x) >> S_FW_NETIF_CMD_IPV4ADDR) & M_FW_NETIF_CMD_IPV4ADDR)
-#define F_FW_NETIF_CMD_IPV4ADDR	V_FW_NETIF_CMD_IPV4ADDR(1U)
-
-#define S_FW_NETIF_CMD_IPV4GW		12
-#define M_FW_NETIF_CMD_IPV4GW		0x1
-#define V_FW_NETIF_CMD_IPV4GW(x)	((x) << S_FW_NETIF_CMD_IPV4GW)
-#define G_FW_NETIF_CMD_IPV4GW(x)	\
-    (((x) >> S_FW_NETIF_CMD_IPV4GW) & M_FW_NETIF_CMD_IPV4GW)
-#define F_FW_NETIF_CMD_IPV4GW	V_FW_NETIF_CMD_IPV4GW(1U)
-
-#define S_FW_NETIF_CMD_NETIFI		8
-#define M_FW_NETIF_CMD_NETIFI		0xffffff
-#define V_FW_NETIF_CMD_NETIFI(x)	((x) << S_FW_NETIF_CMD_NETIFI)
-#define G_FW_NETIF_CMD_NETIFI(x)	\
-    (((x) >> S_FW_NETIF_CMD_NETIFI) & M_FW_NETIF_CMD_NETIFI)
-
-#define S_FW_NETIF_CMD_IFADRIDX		0
-#define M_FW_NETIF_CMD_IFADRIDX		0xff
-#define V_FW_NETIF_CMD_IFADRIDX(x)	((x) << S_FW_NETIF_CMD_IFADRIDX)
-#define G_FW_NETIF_CMD_IFADRIDX(x)	\
-    (((x) >> S_FW_NETIF_CMD_IFADRIDX) & M_FW_NETIF_CMD_IFADRIDX)
-
-#define S_FW_NETIF_CMD_PORTID		28
-#define M_FW_NETIF_CMD_PORTID		0xf
-#define V_FW_NETIF_CMD_PORTID(x)	((x) << S_FW_NETIF_CMD_PORTID)
-#define G_FW_NETIF_CMD_PORTID(x)	\
-    (((x) >> S_FW_NETIF_CMD_PORTID) & M_FW_NETIF_CMD_PORTID)
-
-#define S_FW_NETIF_CMD_VLANID		16
-#define M_FW_NETIF_CMD_VLANID		0xfff
-#define V_FW_NETIF_CMD_VLANID(x)	((x) << S_FW_NETIF_CMD_VLANID)
-#define G_FW_NETIF_CMD_VLANID(x)	\
-    (((x) >> S_FW_NETIF_CMD_VLANID) & M_FW_NETIF_CMD_VLANID)
-
-#define S_FW_NETIF_CMD_MTUVAL		0
-#define M_FW_NETIF_CMD_MTUVAL		0xffff
-#define V_FW_NETIF_CMD_MTUVAL(x)	((x) << S_FW_NETIF_CMD_MTUVAL)
-#define G_FW_NETIF_CMD_MTUVAL(x)	\
-    (((x) >> S_FW_NETIF_CMD_MTUVAL) & M_FW_NETIF_CMD_MTUVAL)
-
 enum fw_watchdog_actions {
 	FW_WATCHDOG_ACTION_FLR = 0x1,
 	FW_WATCHDOG_ACTION_BYPASS = 0x2,
@@ -5802,6 +6741,421 @@
     (((x) >> S_FW_CLIP_CMD_FREE) & M_FW_CLIP_CMD_FREE)
 #define F_FW_CLIP_CMD_FREE	V_FW_CLIP_CMD_FREE(1U)
 
+/******************************************************************************
+ *   F O i S C S I   C O M M A N D s
+ **************************************/
+
+#define	FW_CHNET_IFACE_ADDR_MAX	3
+
+enum fw_chnet_iface_cmd_subop {
+	FW_CHNET_IFACE_CMD_SUBOP_NOOP = 0,
+	
+	FW_CHNET_IFACE_CMD_SUBOP_LINK_UP,
+	FW_CHNET_IFACE_CMD_SUBOP_LINK_DOWN,
+	
+	FW_CHNET_IFACE_CMD_SUBOP_MTU_SET,
+	FW_CHNET_IFACE_CMD_SUBOP_MTU_GET,
+
+	FW_CHNET_IFACE_CMD_SUBOP_MAX,
+};
+
+struct fw_chnet_iface_cmd {
+	__be32 op_to_portid;
+	__be32 retval_len16;
+	__u8   subop;
+	__u8   r2[3];
+	__be32 ifid_ifstate;
+	__be16 mtu;
+	__be16 vlanid;
+	__be32 r3;
+	__be16 r4;
+	__u8   mac[6];
+};
+
+#define S_FW_CHNET_IFACE_CMD_PORTID	0
+#define M_FW_CHNET_IFACE_CMD_PORTID	0xf
+#define V_FW_CHNET_IFACE_CMD_PORTID(x)	((x) << S_FW_CHNET_IFACE_CMD_PORTID)
+#define G_FW_CHNET_IFACE_CMD_PORTID(x)	\
+    (((x) >> S_FW_CHNET_IFACE_CMD_PORTID) & M_FW_CHNET_IFACE_CMD_PORTID)
+
+#define S_FW_CHNET_IFACE_CMD_IFID	8
+#define M_FW_CHNET_IFACE_CMD_IFID	0xffffff
+#define V_FW_CHNET_IFACE_CMD_IFID(x)	((x) << S_FW_CHNET_IFACE_CMD_IFID)
+#define G_FW_CHNET_IFACE_CMD_IFID(x)	\
+    (((x) >> S_FW_CHNET_IFACE_CMD_IFID) & M_FW_CHNET_IFACE_CMD_IFID)
+
+#define S_FW_CHNET_IFACE_CMD_IFSTATE	0
+#define M_FW_CHNET_IFACE_CMD_IFSTATE	0xff
+#define V_FW_CHNET_IFACE_CMD_IFSTATE(x)	((x) << S_FW_CHNET_IFACE_CMD_IFSTATE)
+#define G_FW_CHNET_IFACE_CMD_IFSTATE(x)	\
+    (((x) >> S_FW_CHNET_IFACE_CMD_IFSTATE) & M_FW_CHNET_IFACE_CMD_IFSTATE)
+
+/******************************************************************************
+ *   F O F C O E   C O M M A N D s
+ ************************************/
+
+struct fw_fcoe_res_info_cmd {
+	__be32 op_to_read;
+	__be32 retval_len16;
+	__be16 e_d_tov;
+	__be16 r_a_tov_seq;
+	__be16 r_a_tov_els;
+	__be16 r_r_tov;
+	__be32 max_xchgs;
+	__be32 max_ssns;
+	__be32 used_xchgs;
+	__be32 used_ssns;
+	__be32 max_fcfs;
+	__be32 max_vnps;
+	__be32 used_fcfs;
+	__be32 used_vnps;
+};
+
+struct fw_fcoe_link_cmd {
+	__be32 op_to_portid;
+	__be32 retval_len16;
+	__be32 sub_opcode_fcfi;
+	__u8   r3;
+	__u8   lstatus;
+	__be16 flags;
+	__u8   r4;
+	__u8   set_vlan;
+	__be16 vlan_id;
+	__be32 vnpi_pkd;
+	__be16 r6;
+	__u8   phy_mac[6];
+	__u8   vnport_wwnn[8];
+	__u8   vnport_wwpn[8];
+};
+
+#define S_FW_FCOE_LINK_CMD_PORTID	0
+#define M_FW_FCOE_LINK_CMD_PORTID	0xf
+#define V_FW_FCOE_LINK_CMD_PORTID(x)	((x) << S_FW_FCOE_LINK_CMD_PORTID)
+#define G_FW_FCOE_LINK_CMD_PORTID(x)	\
+    (((x) >> S_FW_FCOE_LINK_CMD_PORTID) & M_FW_FCOE_LINK_CMD_PORTID)
+
+#define S_FW_FCOE_LINK_CMD_SUB_OPCODE		24
+#define M_FW_FCOE_LINK_CMD_SUB_OPCODE		0xff
+#define V_FW_FCOE_LINK_CMD_SUB_OPCODE(x)	\
+    ((x) << S_FW_FCOE_LINK_CMD_SUB_OPCODE)
+#define G_FW_FCOE_LINK_CMD_SUB_OPCODE(x)	\
+    (((x) >> S_FW_FCOE_LINK_CMD_SUB_OPCODE) & M_FW_FCOE_LINK_CMD_SUB_OPCODE)
+
+#define S_FW_FCOE_LINK_CMD_FCFI		0
+#define M_FW_FCOE_LINK_CMD_FCFI		0xffffff
+#define V_FW_FCOE_LINK_CMD_FCFI(x)	((x) << S_FW_FCOE_LINK_CMD_FCFI)
+#define G_FW_FCOE_LINK_CMD_FCFI(x)	\
+    (((x) >> S_FW_FCOE_LINK_CMD_FCFI) & M_FW_FCOE_LINK_CMD_FCFI)
+
+#define S_FW_FCOE_LINK_CMD_VNPI		0
+#define M_FW_FCOE_LINK_CMD_VNPI		0xfffff
+#define V_FW_FCOE_LINK_CMD_VNPI(x)	((x) << S_FW_FCOE_LINK_CMD_VNPI)
+#define G_FW_FCOE_LINK_CMD_VNPI(x)	\
+    (((x) >> S_FW_FCOE_LINK_CMD_VNPI) & M_FW_FCOE_LINK_CMD_VNPI)
+
+struct fw_fcoe_vnp_cmd {
+	__be32 op_to_fcfi;
+	__be32 alloc_to_len16;
+	__be32 gen_wwn_to_vnpi;
+	__be32 vf_id;
+	__be16 iqid;
+	__u8   vnport_mac[6];
+	__u8   vnport_wwnn[8];
+	__u8   vnport_wwpn[8];
+	__u8   cmn_srv_parms[16];
+	__u8   clsp_word_0_1[8];
+};
+
+#define S_FW_FCOE_VNP_CMD_FCFI		0
+#define M_FW_FCOE_VNP_CMD_FCFI		0xfffff
+#define V_FW_FCOE_VNP_CMD_FCFI(x)	((x) << S_FW_FCOE_VNP_CMD_FCFI)
+#define G_FW_FCOE_VNP_CMD_FCFI(x)	\
+    (((x) >> S_FW_FCOE_VNP_CMD_FCFI) & M_FW_FCOE_VNP_CMD_FCFI)
+
+#define S_FW_FCOE_VNP_CMD_ALLOC		31
+#define M_FW_FCOE_VNP_CMD_ALLOC		0x1
+#define V_FW_FCOE_VNP_CMD_ALLOC(x)	((x) << S_FW_FCOE_VNP_CMD_ALLOC)
+#define G_FW_FCOE_VNP_CMD_ALLOC(x)	\
+    (((x) >> S_FW_FCOE_VNP_CMD_ALLOC) & M_FW_FCOE_VNP_CMD_ALLOC)
+#define F_FW_FCOE_VNP_CMD_ALLOC	V_FW_FCOE_VNP_CMD_ALLOC(1U)
+
+#define S_FW_FCOE_VNP_CMD_FREE		30
+#define M_FW_FCOE_VNP_CMD_FREE		0x1
+#define V_FW_FCOE_VNP_CMD_FREE(x)	((x) << S_FW_FCOE_VNP_CMD_FREE)
+#define G_FW_FCOE_VNP_CMD_FREE(x)	\
+    (((x) >> S_FW_FCOE_VNP_CMD_FREE) & M_FW_FCOE_VNP_CMD_FREE)
+#define F_FW_FCOE_VNP_CMD_FREE	V_FW_FCOE_VNP_CMD_FREE(1U)
+
+#define S_FW_FCOE_VNP_CMD_MODIFY	29
+#define M_FW_FCOE_VNP_CMD_MODIFY	0x1
+#define V_FW_FCOE_VNP_CMD_MODIFY(x)	((x) << S_FW_FCOE_VNP_CMD_MODIFY)
+#define G_FW_FCOE_VNP_CMD_MODIFY(x)	\
+    (((x) >> S_FW_FCOE_VNP_CMD_MODIFY) & M_FW_FCOE_VNP_CMD_MODIFY)
+#define F_FW_FCOE_VNP_CMD_MODIFY	V_FW_FCOE_VNP_CMD_MODIFY(1U)
+
+#define S_FW_FCOE_VNP_CMD_GEN_WWN	22
+#define M_FW_FCOE_VNP_CMD_GEN_WWN	0x1
+#define V_FW_FCOE_VNP_CMD_GEN_WWN(x)	((x) << S_FW_FCOE_VNP_CMD_GEN_WWN)
+#define G_FW_FCOE_VNP_CMD_GEN_WWN(x)	\
+    (((x) >> S_FW_FCOE_VNP_CMD_GEN_WWN) & M_FW_FCOE_VNP_CMD_GEN_WWN)
+#define F_FW_FCOE_VNP_CMD_GEN_WWN	V_FW_FCOE_VNP_CMD_GEN_WWN(1U)
+
+#define S_FW_FCOE_VNP_CMD_PERSIST	21
+#define M_FW_FCOE_VNP_CMD_PERSIST	0x1
+#define V_FW_FCOE_VNP_CMD_PERSIST(x)	((x) << S_FW_FCOE_VNP_CMD_PERSIST)
+#define G_FW_FCOE_VNP_CMD_PERSIST(x)	\
+    (((x) >> S_FW_FCOE_VNP_CMD_PERSIST) & M_FW_FCOE_VNP_CMD_PERSIST)
+#define F_FW_FCOE_VNP_CMD_PERSIST	V_FW_FCOE_VNP_CMD_PERSIST(1U)
+
+#define S_FW_FCOE_VNP_CMD_VFID_EN	20
+#define M_FW_FCOE_VNP_CMD_VFID_EN	0x1
+#define V_FW_FCOE_VNP_CMD_VFID_EN(x)	((x) << S_FW_FCOE_VNP_CMD_VFID_EN)
+#define G_FW_FCOE_VNP_CMD_VFID_EN(x)	\
+    (((x) >> S_FW_FCOE_VNP_CMD_VFID_EN) & M_FW_FCOE_VNP_CMD_VFID_EN)
+#define F_FW_FCOE_VNP_CMD_VFID_EN	V_FW_FCOE_VNP_CMD_VFID_EN(1U)
+
+#define S_FW_FCOE_VNP_CMD_VNPI		0
+#define M_FW_FCOE_VNP_CMD_VNPI		0xfffff
+#define V_FW_FCOE_VNP_CMD_VNPI(x)	((x) << S_FW_FCOE_VNP_CMD_VNPI)
+#define G_FW_FCOE_VNP_CMD_VNPI(x)	\
+    (((x) >> S_FW_FCOE_VNP_CMD_VNPI) & M_FW_FCOE_VNP_CMD_VNPI)
+
+struct fw_fcoe_sparams_cmd {
+	__be32 op_to_portid;
+	__be32 retval_len16;
+	__u8   r3[7];
+	__u8   cos;
+	__u8   lport_wwnn[8];
+	__u8   lport_wwpn[8];
+	__u8   cmn_srv_parms[16];
+	__u8   cls_srv_parms[16];
+};
+
+#define S_FW_FCOE_SPARAMS_CMD_PORTID	0
+#define M_FW_FCOE_SPARAMS_CMD_PORTID	0xf
+#define V_FW_FCOE_SPARAMS_CMD_PORTID(x)	((x) << S_FW_FCOE_SPARAMS_CMD_PORTID)
+#define G_FW_FCOE_SPARAMS_CMD_PORTID(x)	\
+    (((x) >> S_FW_FCOE_SPARAMS_CMD_PORTID) & M_FW_FCOE_SPARAMS_CMD_PORTID)
+
+struct fw_fcoe_stats_cmd {
+	__be32 op_to_flowid;
+	__be32 free_to_len16;
+	union fw_fcoe_stats {
+		struct fw_fcoe_stats_ctl {
+			__u8   nstats_port;
+			__u8   port_valid_ix;
+			__be16 r6;
+			__be32 r7;
+			__be64 stat0;
+			__be64 stat1;
+			__be64 stat2;
+			__be64 stat3;
+			__be64 stat4;
+			__be64 stat5;
+		} ctl;
+		struct fw_fcoe_port_stats {
+			__be64 tx_bcast_bytes;
+			__be64 tx_bcast_frames;
+			__be64 tx_mcast_bytes;
+			__be64 tx_mcast_frames;
+			__be64 tx_ucast_bytes;
+			__be64 tx_ucast_frames;
+			__be64 tx_drop_frames;
+			__be64 tx_offload_bytes;
+			__be64 tx_offload_frames;
+			__be64 rx_bcast_bytes;
+			__be64 rx_bcast_frames;
+			__be64 rx_mcast_bytes;
+			__be64 rx_mcast_frames;
+			__be64 rx_ucast_bytes;
+			__be64 rx_ucast_frames;
+			__be64 rx_err_frames;
+		} port_stats;
+		struct fw_fcoe_fcf_stats {
+			__be32 fip_tx_bytes;
+			__be32 fip_tx_fr;
+			__be64 fcf_ka;
+			__be64 mcast_adv_rcvd;
+			__be16 ucast_adv_rcvd;
+			__be16 sol_sent;
+			__be16 vlan_req;
+			__be16 vlan_rpl;
+			__be16 clr_vlink;
+			__be16 link_down;
+			__be16 link_up;
+			__be16 logo;
+			__be16 flogi_req;
+			__be16 flogi_rpl;
+			__be16 fdisc_req;
+			__be16 fdisc_rpl;
+			__be16 fka_prd_chg;
+			__be16 fc_map_chg;
+			__be16 vfid_chg;
+			__u8   no_fka_req;
+			__u8   no_vnp;
+		} fcf_stats;
+		struct fw_fcoe_pcb_stats {
+			__be64 tx_bytes;
+			__be64 tx_frames;
+			__be64 rx_bytes;
+			__be64 rx_frames;
+			__be32 vnp_ka;
+			__be32 unsol_els_rcvd;
+			__be64 unsol_cmd_rcvd;
+			__be16 implicit_logo;
+			__be16 flogi_inv_sparm;
+			__be16 fdisc_inv_sparm;
+			__be16 flogi_rjt;
+			__be16 fdisc_rjt;
+			__be16 no_ssn;
+			__be16 mac_flt_fail;
+			__be16 inv_fr_rcvd;
+		} pcb_stats;
+		struct fw_fcoe_scb_stats {
+			__be64 tx_bytes;
+			__be64 tx_frames;
+			__be64 rx_bytes;
+			__be64 rx_frames;
+			__be32 host_abrt_req;
+			__be32 adap_auto_abrt;
+			__be32 adap_abrt_rsp;
+			__be32 host_ios_req;
+			__be16 ssn_offl_ios;
+			__be16 ssn_not_rdy_ios;
+			__u8   rx_data_ddp_err;
+			__u8   ddp_flt_set_err;
+			__be16 rx_data_fr_err;
+			__u8   bad_st_abrt_req;
+			__u8   no_io_abrt_req;
+			__u8   abort_tmo;
+			__u8   abort_tmo_2;
+			__be32 abort_req;
+			__u8   no_ppod_res_tmo;
+			__u8   bp_tmo;
+			__u8   adap_auto_cls;
+			__u8   no_io_cls_req;
+			__be32 host_cls_req;
+			__be64 unsol_cmd_rcvd;
+			__be32 plogi_req_rcvd;
+			__be32 prli_req_rcvd;
+			__be16 logo_req_rcvd;
+			__be16 prlo_req_rcvd;
+			__be16 plogi_rjt_rcvd;
+			__be16 prli_rjt_rcvd;
+			__be32 adisc_req_rcvd;
+			__be32 rscn_rcvd;
+			__be32 rrq_req_rcvd;
+			__be32 unsol_els_rcvd;
+			__u8   adisc_rjt_rcvd;
+			__u8   scr_rjt;
+			__u8   ct_rjt;
+			__u8   inval_bls_rcvd;
+			__be32 ba_rjt_rcvd;
+		} scb_stats;
+	} u;
+};
+
+#define S_FW_FCOE_STATS_CMD_FLOWID	0
+#define M_FW_FCOE_STATS_CMD_FLOWID	0xfffff
+#define V_FW_FCOE_STATS_CMD_FLOWID(x)	((x) << S_FW_FCOE_STATS_CMD_FLOWID)
+#define G_FW_FCOE_STATS_CMD_FLOWID(x)	\
+    (((x) >> S_FW_FCOE_STATS_CMD_FLOWID) & M_FW_FCOE_STATS_CMD_FLOWID)
+
+#define S_FW_FCOE_STATS_CMD_FREE	30
+#define M_FW_FCOE_STATS_CMD_FREE	0x1
+#define V_FW_FCOE_STATS_CMD_FREE(x)	((x) << S_FW_FCOE_STATS_CMD_FREE)
+#define G_FW_FCOE_STATS_CMD_FREE(x)	\
+    (((x) >> S_FW_FCOE_STATS_CMD_FREE) & M_FW_FCOE_STATS_CMD_FREE)
+#define F_FW_FCOE_STATS_CMD_FREE	V_FW_FCOE_STATS_CMD_FREE(1U)
+
+#define S_FW_FCOE_STATS_CMD_NSTATS	4
+#define M_FW_FCOE_STATS_CMD_NSTATS	0x7
+#define V_FW_FCOE_STATS_CMD_NSTATS(x)	((x) << S_FW_FCOE_STATS_CMD_NSTATS)
+#define G_FW_FCOE_STATS_CMD_NSTATS(x)	\
+    (((x) >> S_FW_FCOE_STATS_CMD_NSTATS) & M_FW_FCOE_STATS_CMD_NSTATS)
+
+#define S_FW_FCOE_STATS_CMD_PORT	0
+#define M_FW_FCOE_STATS_CMD_PORT	0x3
+#define V_FW_FCOE_STATS_CMD_PORT(x)	((x) << S_FW_FCOE_STATS_CMD_PORT)
+#define G_FW_FCOE_STATS_CMD_PORT(x)	\
+    (((x) >> S_FW_FCOE_STATS_CMD_PORT) & M_FW_FCOE_STATS_CMD_PORT)
+
+#define S_FW_FCOE_STATS_CMD_PORT_VALID		7
+#define M_FW_FCOE_STATS_CMD_PORT_VALID		0x1
+#define V_FW_FCOE_STATS_CMD_PORT_VALID(x)	\
+    ((x) << S_FW_FCOE_STATS_CMD_PORT_VALID)
+#define G_FW_FCOE_STATS_CMD_PORT_VALID(x)	\
+    (((x) >> S_FW_FCOE_STATS_CMD_PORT_VALID) & M_FW_FCOE_STATS_CMD_PORT_VALID)
+#define F_FW_FCOE_STATS_CMD_PORT_VALID	V_FW_FCOE_STATS_CMD_PORT_VALID(1U)
+
+#define S_FW_FCOE_STATS_CMD_IX		0
+#define M_FW_FCOE_STATS_CMD_IX		0x3f
+#define V_FW_FCOE_STATS_CMD_IX(x)	((x) << S_FW_FCOE_STATS_CMD_IX)
+#define G_FW_FCOE_STATS_CMD_IX(x)	\
+    (((x) >> S_FW_FCOE_STATS_CMD_IX) & M_FW_FCOE_STATS_CMD_IX)
+
+struct fw_fcoe_fcf_cmd {
+	__be32 op_to_fcfi;
+	__be32 retval_len16;
+	__be16 priority_pkd;
+	__u8   mac[6];
+	__u8   name_id[8];
+	__u8   fabric[8];
+	__be16 vf_id;
+	__be16 max_fcoe_size;
+	__u8   vlan_id;
+	__u8   fc_map[3];
+	__be32 fka_adv;
+	__be32 r6;
+	__u8   r7_hi;
+	__u8   fpma_to_portid;
+	__u8   spma_mac[6];
+	__be64 r8;
+};
+
+#define S_FW_FCOE_FCF_CMD_FCFI		0
+#define M_FW_FCOE_FCF_CMD_FCFI		0xfffff
+#define V_FW_FCOE_FCF_CMD_FCFI(x)	((x) << S_FW_FCOE_FCF_CMD_FCFI)
+#define G_FW_FCOE_FCF_CMD_FCFI(x)	\
+    (((x) >> S_FW_FCOE_FCF_CMD_FCFI) & M_FW_FCOE_FCF_CMD_FCFI)
+
+#define S_FW_FCOE_FCF_CMD_PRIORITY	0
+#define M_FW_FCOE_FCF_CMD_PRIORITY	0xff
+#define V_FW_FCOE_FCF_CMD_PRIORITY(x)	((x) << S_FW_FCOE_FCF_CMD_PRIORITY)
+#define G_FW_FCOE_FCF_CMD_PRIORITY(x)	\
+    (((x) >> S_FW_FCOE_FCF_CMD_PRIORITY) & M_FW_FCOE_FCF_CMD_PRIORITY)
+
+#define S_FW_FCOE_FCF_CMD_FPMA		6
+#define M_FW_FCOE_FCF_CMD_FPMA		0x1
+#define V_FW_FCOE_FCF_CMD_FPMA(x)	((x) << S_FW_FCOE_FCF_CMD_FPMA)
+#define G_FW_FCOE_FCF_CMD_FPMA(x)	\
+    (((x) >> S_FW_FCOE_FCF_CMD_FPMA) & M_FW_FCOE_FCF_CMD_FPMA)
+#define F_FW_FCOE_FCF_CMD_FPMA	V_FW_FCOE_FCF_CMD_FPMA(1U)
+
+#define S_FW_FCOE_FCF_CMD_SPMA		5
+#define M_FW_FCOE_FCF_CMD_SPMA		0x1
+#define V_FW_FCOE_FCF_CMD_SPMA(x)	((x) << S_FW_FCOE_FCF_CMD_SPMA)
+#define G_FW_FCOE_FCF_CMD_SPMA(x)	\
+    (((x) >> S_FW_FCOE_FCF_CMD_SPMA) & M_FW_FCOE_FCF_CMD_SPMA)
+#define F_FW_FCOE_FCF_CMD_SPMA	V_FW_FCOE_FCF_CMD_SPMA(1U)
+
+#define S_FW_FCOE_FCF_CMD_LOGIN		4
+#define M_FW_FCOE_FCF_CMD_LOGIN		0x1
+#define V_FW_FCOE_FCF_CMD_LOGIN(x)	((x) << S_FW_FCOE_FCF_CMD_LOGIN)
+#define G_FW_FCOE_FCF_CMD_LOGIN(x)	\
+    (((x) >> S_FW_FCOE_FCF_CMD_LOGIN) & M_FW_FCOE_FCF_CMD_LOGIN)
+#define F_FW_FCOE_FCF_CMD_LOGIN	V_FW_FCOE_FCF_CMD_LOGIN(1U)
+
+#define S_FW_FCOE_FCF_CMD_PORTID	0
+#define M_FW_FCOE_FCF_CMD_PORTID	0xf
+#define V_FW_FCOE_FCF_CMD_PORTID(x)	((x) << S_FW_FCOE_FCF_CMD_PORTID)
+#define G_FW_FCOE_FCF_CMD_PORTID(x)	\
+    (((x) >> S_FW_FCOE_FCF_CMD_PORTID) & M_FW_FCOE_FCF_CMD_PORTID)
+
+/******************************************************************************
+ *   E R R O R   a n d   D E B U G   C O M M A N D s
+ ******************************************************/
+
 enum fw_error_type {
 	FW_ERROR_TYPE_EXCEPTION		= 0x0,
 	FW_ERROR_TYPE_HWMODULE		= 0x1,
@@ -5911,7 +7265,6 @@
 #define G_FW_DEBUG_CMD_TYPE(x)	\
     (((x) >> S_FW_DEBUG_CMD_TYPE) & M_FW_DEBUG_CMD_TYPE)
 
-
 /******************************************************************************
  *   P C I E   F W   R E G I S T E R
  **************************************/
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/offload.h
--- a/head/sys/dev/cxgbe/offload.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/offload.h	Wed Jul 25 17:04:43 2012 +0300
@@ -24,19 +24,13 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/cxgbe/offload.h 228561 2011-12-16 02:09:51Z np $
+ * $FreeBSD: head/sys/dev/cxgbe/offload.h 237263 2012-06-19 07:34:13Z np $
  *
  */
 
 #ifndef __T4_OFFLOAD_H__
 #define __T4_OFFLOAD_H__
 
-/* XXX: flagrant misuse of mbuf fields (during tx by TOM) */
-#define MBUF_EQ(m)		(*((void **)(&(m)->m_pkthdr.rcvif)))
-/* These have to work for !M_PKTHDR so we use a field from m_hdr. */
-#define MBUF_TX_CREDITS(m)	((m)->m_hdr.pad[0])
-#define MBUF_DMA_MAPPED(m)	((m)->m_hdr.pad[1])
-
 #define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \
 	(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
 	(w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
@@ -119,7 +113,7 @@
 	struct t4_range ocq;
 };
 
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 enum {
 	ULD_TOM = 1,
 };
@@ -130,13 +124,8 @@
 	SLIST_ENTRY(uld_info) link;
 	int refcount;
 	int uld_id;
-	int (*attach)(struct adapter *, void **);
-	int (*detach)(void *);
-};
-
-struct uld_softc {
-	struct uld_info *uld;
-	void *softc;
+	int (*activate)(struct adapter *);
+	int (*deactivate)(struct adapter *);
 };
 
 struct tom_tunables {
@@ -148,6 +137,8 @@
 
 int t4_register_uld(struct uld_info *);
 int t4_unregister_uld(struct uld_info *);
+int t4_activate_uld(struct adapter *, int);
+int t4_deactivate_uld(struct adapter *, int);
 #endif
 
 #endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/osdep.h
--- a/head/sys/dev/cxgbe/osdep.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/osdep.h	Wed Jul 25 17:04:43 2012 +0300
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/cxgbe/osdep.h 228561 2011-12-16 02:09:51Z np $
+ * $FreeBSD: head/sys/dev/cxgbe/osdep.h 237436 2012-06-22 07:51:15Z np $
  *
  */
 
@@ -111,6 +111,7 @@
 #define AUTONEG_DISABLE	0
 #define AUTONEG_ENABLE	1
 
+#define PCI_DEVICE_ID	PCIR_DEVICE
 #define PCI_CAP_ID_VPD  PCIY_VPD
 #define PCI_VPD_ADDR    PCIR_VPD_ADDR
 #define PCI_VPD_ADDR_F  0x8000
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/t4_l2t.c
--- a/head/sys/dev/cxgbe/t4_l2t.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/t4_l2t.c	Wed Jul 25 17:04:43 2012 +0300
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2011 Chelsio Communications, Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,9 +24,10 @@
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_l2t.c 231115 2012-02-07 06:21:59Z np $");
+__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_l2t.c 237819 2012-06-29 19:51:06Z np $");
 
 #include "opt_inet.h"
+#include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -38,16 +39,7 @@
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/sbuf.h>
-#include <net/if.h>
-#include <net/if_types.h>
-#include <net/ethernet.h>
-#include <net/if_vlan_var.h>
-#include <net/if_dl.h>
-#include <net/if_llatbl.h>
-#include <net/route.h>
 #include <netinet/in.h>
-#include <netinet/in_var.h>
-#include <netinet/if_ether.h>
 
 #include "common/common.h"
 #include "common/jhash.h"
@@ -72,42 +64,11 @@
  * lifetime of an L2T entry is fully contained in the lifetime of the TOE.
  */
 
-/* identifies sync vs async L2T_WRITE_REQs */
-#define S_SYNC_WR    12
-#define V_SYNC_WR(x) ((x) << S_SYNC_WR)
-#define F_SYNC_WR    V_SYNC_WR(1)
-
-enum {
-	L2T_STATE_VALID,	/* entry is up to date */
-	L2T_STATE_STALE,	/* entry may be used but needs revalidation */
-	L2T_STATE_RESOLVING,	/* entry needs address resolution */
-	L2T_STATE_SYNC_WRITE,	/* synchronous write of entry underway */
-
-	/* when state is one of the below the entry is not hashed */
-	L2T_STATE_SWITCHING,	/* entry is being used by a switching filter */
-	L2T_STATE_UNUSED	/* entry not in use */
-};
-
-struct l2t_data {
-	struct rwlock lock;
-	volatile int nfree;	/* number of free entries */
-	struct l2t_entry *rover;/* starting point for next allocation */
-	struct l2t_entry l2tab[L2T_SIZE];
-};
-
-static int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *,
-    struct mbuf *);
-
-#define VLAN_NONE	0xfff
-#define SA(x)           ((struct sockaddr *)(x))
-#define SIN(x)          ((struct sockaddr_in *)(x))
-#define SINADDR(x)      (SIN(x)->sin_addr.s_addr)
-
 /*
  * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
  */
-static struct l2t_entry *
-alloc_l2e(struct l2t_data *d)
+struct l2t_entry *
+t4_alloc_l2e(struct l2t_data *d)
 {
 	struct l2t_entry *end, *e, **p;
 
@@ -121,7 +82,8 @@
 		if (atomic_load_acq_int(&e->refcnt) == 0)
 			goto found;
 
-	for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ;
+	for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e)
+		continue;
 found:
 	d->rover = e + 1;
 	atomic_subtract_int(&d->nfree, 1);
@@ -148,19 +110,18 @@
  * Write an L2T entry.  Must be called with the entry locked.
  * The write may be synchronous or asynchronous.
  */
-static int
-write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
+int
+t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
 {
-	struct mbuf *m;
+	struct wrqe *wr;
 	struct cpl_l2t_write_req *req;
 
 	mtx_assert(&e->lock, MA_OWNED);
 
-	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
+	wr = alloc_wrqe(sizeof(*req), &sc->sge.mgmtq);
+	if (wr == NULL)
 		return (ENOMEM);
-
-	req = mtod(m, struct cpl_l2t_write_req *);
-	m->m_pkthdr.len = m->m_len = sizeof(*req);
+	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx |
@@ -170,7 +131,7 @@
 	req->vlan = htons(e->vlan);
 	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
 
-	t4_mgmt_tx(sc, m);
+	t4_wrq_tx(sc, wr);
 
 	if (sync && e->state != L2T_STATE_SWITCHING)
 		e->state = L2T_STATE_SYNC_WRITE;
@@ -189,7 +150,7 @@
 	struct l2t_entry *e;
 
 	rw_rlock(&d->lock);
-	e = alloc_l2e(d);
+	e = t4_alloc_l2e(d);
 	if (e) {
 		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
 		e->state = L2T_STATE_SWITCHING;
@@ -214,7 +175,7 @@
 	e->lport = port;
 	memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
 	mtx_lock(&e->lock);
-	rc = write_l2e(sc, e, 0);
+	rc = t4_write_l2e(sc, e, 0);
 	mtx_unlock(&e->lock);
 	return (rc);
 }
@@ -234,10 +195,13 @@
 	rw_init(&d->lock, "L2T");
 
 	for (i = 0; i < L2T_SIZE; i++) {
-		d->l2tab[i].idx = i;
-		d->l2tab[i].state = L2T_STATE_UNUSED;
-		mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
-		atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
+		struct l2t_entry *e = &d->l2tab[i];
+
+		e->idx = i;
+		e->state = L2T_STATE_UNUSED;
+		mtx_init(&e->lock, "L2T_E", NULL, MTX_DEF);
+		STAILQ_INIT(&e->wr_list);
+		atomic_store_rel_int(&e->refcnt, 0);
 	}
 
 	sc->l2t = d;
@@ -259,6 +223,24 @@
 	return (0);
 }
 
+int
+do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
+    struct mbuf *m)
+{
+	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
+	unsigned int tid = GET_TID(rpl);
+	unsigned int idx = tid & (L2T_SIZE - 1);
+
+	if (__predict_false(rpl->status != CPL_ERR_NONE)) {
+		log(LOG_ERR,
+		    "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
+		    rpl->status, idx);
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
 #ifdef SBUF_DRAIN
 static inline unsigned int
 vlan_prio(const struct l2t_entry *e)
@@ -273,7 +255,7 @@
 	case L2T_STATE_VALID: return 'V';  /* valid, fast-path entry */
 	case L2T_STATE_STALE: return 'S';  /* needs revalidation, but usable */
 	case L2T_STATE_SYNC_WRITE: return 'W';
-	case L2T_STATE_RESOLVING: return e->arpq_head ? 'A' : 'R';
+	case L2T_STATE_RESOLVING: return STAILQ_EMPTY(&e->wr_list) ? 'R' : 'A';
 	case L2T_STATE_SWITCHING: return 'X';
 	default: return 'U';
 	}
@@ -311,20 +293,20 @@
 			    "Ethernet address  VLAN/P LP State Users Port");
 			header = 1;
 		}
-		if (e->state == L2T_STATE_SWITCHING || e->v6)
+		if (e->state == L2T_STATE_SWITCHING)
 			ip[0] = 0;
 		else
 			snprintf(ip, sizeof(ip), "%s",
-			    inet_ntoa(*(struct in_addr *)&e->addr[0]));
+			    inet_ntoa(*(struct in_addr *)&e->addr));
 
-		/* XXX: accessing lle probably not safe? */
+		/* XXX: e->ifp may not be around */
 		sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
 			   " %u %2u   %c   %5u %s",
 			   e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
 			   e->dmac[3], e->dmac[4], e->dmac[5],
 			   e->vlan & 0xfff, vlan_prio(e), e->lport,
 			   l2e_state(e), atomic_load_acq_int(&e->refcnt),
-			   e->lle ? e->lle->lle_tbl->llt_ifp->if_xname : "");
+			   e->ifp->if_xname);
 skip:
 		mtx_unlock(&e->lock);
 	}
@@ -335,459 +317,3 @@
 	return (rc);
 }
 #endif
-
-#ifndef TCP_OFFLOAD_DISABLE
-static inline void
-l2t_hold(struct l2t_data *d, struct l2t_entry *e)
-{
-	if (atomic_fetchadd_int(&e->refcnt, 1) == 0)  /* 0 -> 1 transition */
-		atomic_subtract_int(&d->nfree, 1);
-}
-
-/*
- * To avoid having to check address families we do not allow v4 and v6
- * neighbors to be on the same hash chain.  We keep v4 entries in the first
- * half of available hash buckets and v6 in the second.
- */
-enum {
-	L2T_SZ_HALF = L2T_SIZE / 2,
-	L2T_HASH_MASK = L2T_SZ_HALF - 1
-};
-
-static inline unsigned int
-arp_hash(const uint32_t *key, int ifindex)
-{
-	return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK;
-}
-
-static inline unsigned int
-ipv6_hash(const uint32_t *key, int ifindex)
-{
-	uint32_t xor = key[0] ^ key[1] ^ key[2] ^ key[3];
-
-	return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK);
-}
-
-static inline unsigned int
-addr_hash(const uint32_t *addr, int addr_len, int ifindex)
-{
-	return addr_len == 4 ? arp_hash(addr, ifindex) :
-			       ipv6_hash(addr, ifindex);
-}
-
-/*
- * Checks if an L2T entry is for the given IP/IPv6 address.  It does not check
- * whether the L2T entry and the address are of the same address family.
- * Callers ensure an address is only checked against L2T entries of the same
- * family, something made trivial by the separation of IP and IPv6 hash chains
- * mentioned above.  Returns 0 if there's a match,
- */
-static inline int
-addreq(const struct l2t_entry *e, const uint32_t *addr)
-{
-	if (e->v6)
-		return (e->addr[0] ^ addr[0]) | (e->addr[1] ^ addr[1]) |
-		       (e->addr[2] ^ addr[2]) | (e->addr[3] ^ addr[3]);
-	return e->addr[0] ^ addr[0];
-}
-
-/*
- * Add a packet to an L2T entry's queue of packets awaiting resolution.
- * Must be called with the entry's lock held.
- */
-static inline void
-arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
-{
-	mtx_assert(&e->lock, MA_OWNED);
-
-	KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt not NULL", __func__));
-	if (e->arpq_head)
-		e->arpq_tail->m_nextpkt = m;
-	else
-		e->arpq_head = m;
-	e->arpq_tail = m;
-}
-
-static inline void
-send_pending(struct adapter *sc, struct l2t_entry *e)
-{
-	struct mbuf *m, *next;
-
-	mtx_assert(&e->lock, MA_OWNED);
-
-	for (m = e->arpq_head; m; m = next) {
-		next = m->m_nextpkt;
-		m->m_nextpkt = NULL;
-		t4_wrq_tx(sc, MBUF_EQ(m), m);
-	}
-	e->arpq_head = e->arpq_tail = NULL;
-}
-
-#ifdef INET
-/*
- * Looks up and fills up an l2t_entry's lle.  We grab all the locks that we need
- * ourself, and update e->state at the end if e->lle was successfully filled.
- *
- * The lle passed in comes from arpresolve and is ignored as it does not appear
- * to be of much use.
- */
-static int
-l2t_fill_lle(struct adapter *sc, struct l2t_entry *e, struct llentry *unused)
-{
-        int rc = 0;
-        struct sockaddr_in sin;
-        struct ifnet *ifp = e->ifp;
-        struct llentry *lle;
-
-        bzero(&sin, sizeof(struct sockaddr_in));
-	if (e->v6)
-		panic("%s: IPv6 L2 resolution not supported yet.", __func__);
-
-	sin.sin_family = AF_INET;
-	sin.sin_len = sizeof(struct sockaddr_in);
-	memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
-
-        mtx_assert(&e->lock, MA_NOTOWNED);
-        KASSERT(e->addr && ifp, ("%s: bad prep before call", __func__));
-
-        IF_AFDATA_LOCK(ifp);
-        lle = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, SA(&sin));
-        IF_AFDATA_UNLOCK(ifp);
-        if (!LLE_IS_VALID(lle))
-                return (ENOMEM);
-        if (!(lle->la_flags & LLE_VALID)) {
-                rc = EINVAL;
-                goto done;
-        }
-
-        LLE_ADDREF(lle);
-
-        mtx_lock(&e->lock);
-        if (e->state == L2T_STATE_RESOLVING) {
-                KASSERT(e->lle == NULL, ("%s: lle already valid", __func__));
-                e->lle = lle;
-                memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
-		write_l2e(sc, e, 1);
-        } else {
-                KASSERT(e->lle == lle, ("%s: lle changed", __func__));
-                LLE_REMREF(lle);
-        }
-        mtx_unlock(&e->lock);
-done:
-        LLE_WUNLOCK(lle);
-        return (rc);
-}
-#endif
-
-int
-t4_l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
-{
-#ifndef INET
-	return (EINVAL);
-#else
-	struct llentry *lle = NULL;
-	struct sockaddr_in sin;
-	struct ifnet *ifp = e->ifp;
-
-	if (e->v6)
-		panic("%s: IPv6 L2 resolution not supported yet.", __func__);
-
-        bzero(&sin, sizeof(struct sockaddr_in));
-	sin.sin_family = AF_INET;
-	sin.sin_len = sizeof(struct sockaddr_in);
-	memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
-
-again:
-	switch (e->state) {
-	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
-		if (arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
-			l2t_fill_lle(sc, e, lle);
-
-		/* Fall through */
-
-	case L2T_STATE_VALID:     /* fast-path, send the packet on */
-		return t4_wrq_tx(sc, MBUF_EQ(m), m);
-
-	case L2T_STATE_RESOLVING:
-	case L2T_STATE_SYNC_WRITE:
-		mtx_lock(&e->lock);
-		if (e->state != L2T_STATE_SYNC_WRITE &&
-		    e->state != L2T_STATE_RESOLVING) {
-			/* state changed by the time we got here */
-			mtx_unlock(&e->lock);
-			goto again;
-		}
-		arpq_enqueue(e, m);
-		mtx_unlock(&e->lock);
-
-		if (e->state == L2T_STATE_RESOLVING &&
-		    arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
-			l2t_fill_lle(sc, e, lle);
-	}
-
-	return (0);
-#endif
-}
-
-/*
- * Called when an L2T entry has no more users.  The entry is left in the hash
- * table since it is likely to be reused but we also bump nfree to indicate
- * that the entry can be reallocated for a different neighbor.  We also drop
- * the existing neighbor reference in case the neighbor is going away and is
- * waiting on our reference.
- *
- * Because entries can be reallocated to other neighbors once their ref count
- * drops to 0 we need to take the entry's lock to avoid races with a new
- * incarnation.
- */
-static void
-t4_l2e_free(struct l2t_entry *e)
-{
-	struct llentry *lle = NULL;
-	struct l2t_data *d;
-
-	mtx_lock(&e->lock);
-	if (atomic_load_acq_int(&e->refcnt) == 0) {  /* hasn't been recycled */
-		lle = e->lle;
-		e->lle = NULL;
-		/*
-		 * Don't need to worry about the arpq, an L2T entry can't be
-		 * released if any packets are waiting for resolution as we
-		 * need to be able to communicate with the device to close a
-		 * connection.
-		 */
-	}
-	mtx_unlock(&e->lock);
-
-	d = container_of(e, struct l2t_data, l2tab[e->idx]);
-	atomic_add_int(&d->nfree, 1);
-
-	if (lle)
-		LLE_FREE(lle);
-}
-
-void
-t4_l2t_release(struct l2t_entry *e)
-{
-	if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
-		t4_l2e_free(e);
-}
-
-static int
-do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
-    struct mbuf *m)
-{
-	struct adapter *sc = iq->adapter;
-	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
-	unsigned int tid = GET_TID(rpl);
-	unsigned int idx = tid & (L2T_SIZE - 1);
-
-	if (__predict_false(rpl->status != CPL_ERR_NONE)) {
-		log(LOG_ERR,
-		    "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
-		    rpl->status, idx);
-		return (EINVAL);
-	}
-
-	if (tid & F_SYNC_WR) {
-		struct l2t_entry *e = &sc->l2t->l2tab[idx];
-
-		mtx_lock(&e->lock);
-		if (e->state != L2T_STATE_SWITCHING) {
-			send_pending(sc, e);
-			e->state = L2T_STATE_VALID;
-		}
-		mtx_unlock(&e->lock);
-	}
-
-	return (0);
-}
-
-/*
- * Reuse an L2T entry that was previously used for the same next hop.
- */
-static void
-reuse_entry(struct l2t_entry *e)
-{
-	struct llentry *lle;
-
-	mtx_lock(&e->lock);                /* avoid race with t4_l2t_free */
-	lle = e->lle;
-	if (lle) {
-		KASSERT(lle->la_flags & LLE_VALID,
-			("%s: invalid lle stored in l2t_entry", __func__));
-
-		if (lle->la_expire >= time_uptime)
-			e->state = L2T_STATE_STALE;
-		else
-			e->state = L2T_STATE_VALID;
-	} else
-		e->state = L2T_STATE_RESOLVING;
-	mtx_unlock(&e->lock);
-}
-
-/*
- * The TOE wants an L2 table entry that it can use to reach the next hop over
- * the specified port.  Produce such an entry - create one if needed.
- *
- * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
- * top of the real cxgbe interface.
- */
-struct l2t_entry *
-t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
-{
-	struct l2t_entry *e;
-	struct l2t_data *d = pi->adapter->l2t;
-	int addr_len;
-	uint32_t *addr;
-	int hash;
-	struct sockaddr_in6 *sin6;
-	unsigned int smt_idx = pi->port_id;
-
-	if (sa->sa_family == AF_INET) {
-		addr = (uint32_t *)&SINADDR(sa);
-		addr_len = sizeof(SINADDR(sa));
-	} else if (sa->sa_family == AF_INET6) {
-		sin6 = (struct sockaddr_in6 *)sa;
-		addr = (uint32_t *)&sin6->sin6_addr.s6_addr;
-		addr_len = sizeof(sin6->sin6_addr.s6_addr);
-	} else
-		return (NULL);
-
-#ifndef VLAN_TAG
-	if (ifp->if_type == IFT_L2VLAN)
-		return (NULL);
-#endif
-
-	hash = addr_hash(addr, addr_len, ifp->if_index);
-
-	rw_wlock(&d->lock);
-	for (e = d->l2tab[hash].first; e; e = e->next) {
-		if (!addreq(e, addr) && e->ifp == ifp && e->smt_idx == smt_idx){
-			l2t_hold(d, e);
-			if (atomic_load_acq_int(&e->refcnt) == 1)
-				reuse_entry(e);
-			goto done;
-		}
-	}
-
-	/* Need to allocate a new entry */
-	e = alloc_l2e(d);
-	if (e) {
-		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
-		e->state = L2T_STATE_RESOLVING;
-		memcpy(e->addr, addr, addr_len);
-		e->ifindex = ifp->if_index;
-		e->smt_idx = smt_idx;
-		e->ifp = ifp;
-		e->hash = hash;
-		e->lport = pi->lport;
-		e->v6 = (addr_len == 16);
-		e->lle = NULL;
-		atomic_store_rel_int(&e->refcnt, 1);
-#ifdef VLAN_TAG
-		if (ifp->if_type == IFT_L2VLAN)
-			VLAN_TAG(ifp, &e->vlan);
-		else
-			e->vlan = VLAN_NONE;
-#endif
-		e->next = d->l2tab[hash].first;
-		d->l2tab[hash].first = e;
-		mtx_unlock(&e->lock);
-	}
-done:
-	rw_wunlock(&d->lock);
-	return e;
-}
-
-/*
- * Called when the host's neighbor layer makes a change to some entry that is
- * loaded into the HW L2 table.
- */
-void
-t4_l2t_update(struct adapter *sc, struct llentry *lle)
-{
-	struct l2t_entry *e;
-	struct l2t_data *d = sc->l2t;
-	struct sockaddr *sa = L3_ADDR(lle);
-	struct llentry *old_lle = NULL;
-	uint32_t *addr = (uint32_t *)&SINADDR(sa);
-	struct ifnet *ifp = lle->lle_tbl->llt_ifp;
-	int hash = addr_hash(addr, sizeof(*addr), ifp->if_index);
-
-	KASSERT(d != NULL, ("%s: no L2 table", __func__));
-	LLE_WLOCK_ASSERT(lle);
-	KASSERT(lle->la_flags & LLE_VALID || lle->la_flags & LLE_DELETED,
-	    ("%s: entry neither valid nor deleted.", __func__));
-
-	rw_rlock(&d->lock);
-	for (e = d->l2tab[hash].first; e; e = e->next) {
-		if (!addreq(e, addr) && e->ifp == ifp) {
-			mtx_lock(&e->lock);
-			if (atomic_load_acq_int(&e->refcnt))
-				goto found;
-			e->state = L2T_STATE_STALE;
-			mtx_unlock(&e->lock);
-			break;
-		}
-	}
-	rw_runlock(&d->lock);
-
-	/* The TOE has no interest in this LLE */
-	return;
-
- found:
-	rw_runlock(&d->lock);
-
-        if (atomic_load_acq_int(&e->refcnt)) {
-
-                /* Entry is referenced by at least 1 offloaded connection. */
-
-                /* Handle deletes first */
-                if (lle->la_flags & LLE_DELETED) {
-                        if (lle == e->lle) {
-                                e->lle = NULL;
-                                e->state = L2T_STATE_RESOLVING;
-                                LLE_REMREF(lle);
-                        }
-                        goto done;
-                }
-
-                if (lle != e->lle) {
-                        old_lle = e->lle;
-                        LLE_ADDREF(lle);
-                        e->lle = lle;
-                }
-
-                if (e->state == L2T_STATE_RESOLVING ||
-                    memcmp(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN)) {
-
-                        /* unresolved -> resolved; or dmac changed */
-
-                        memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
-			write_l2e(sc, e, 1);
-                } else {
-
-                        /* +ve reinforcement of a valid or stale entry */
-
-                }
-
-                e->state = L2T_STATE_VALID;
-
-        } else {
-                /*
-                 * Entry was used previously but is unreferenced right now.
-                 * e->lle has been released and NULL'd out by t4_l2t_free, or
-                 * l2t_release is about to call t4_l2t_free and do that.
-                 *
-                 * Either way this is of no interest to us.
-                 */
-        }
-
-done:
-        mtx_unlock(&e->lock);
-        if (old_lle)
-                LLE_FREE(old_lle);
-}
-
-#endif
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/t4_l2t.h
--- a/head/sys/dev/cxgbe/t4_l2t.h	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/t4_l2t.h	Wed Jul 25 17:04:43 2012 +0300
@@ -23,15 +23,32 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/sys/dev/cxgbe/t4_l2t.h 231115 2012-02-07 06:21:59Z np $
+ * $FreeBSD: head/sys/dev/cxgbe/t4_l2t.h 237263 2012-06-19 07:34:13Z np $
  *
  */
 
 #ifndef __T4_L2T_H
 #define __T4_L2T_H
 
+/* identifies sync vs async L2T_WRITE_REQs */
+#define S_SYNC_WR    12
+#define V_SYNC_WR(x) ((x) << S_SYNC_WR)
+#define F_SYNC_WR    V_SYNC_WR(1)
+
 enum { L2T_SIZE = 4096 };     /* # of L2T entries */
 
+enum {
+	L2T_STATE_VALID,	/* entry is up to date */
+	L2T_STATE_STALE,	/* entry may be used but needs revalidation */
+	L2T_STATE_RESOLVING,	/* entry needs address resolution */
+	L2T_STATE_FAILED,	/* failed to resolve */
+	L2T_STATE_SYNC_WRITE,	/* synchronous write of entry underway */
+
+	/* when state is one of the below the entry is not hashed */
+	L2T_STATE_SWITCHING,	/* entry is being used by a switching filter */
+	L2T_STATE_UNUSED	/* entry not in use */
+};
+
 /*
  * Each L2T entry plays multiple roles.  First of all, it keeps state for the
  * corresponding entry of the HW L2 table and maintains a queue of offload
@@ -43,39 +60,49 @@
 struct l2t_entry {
 	uint16_t state;			/* entry state */
 	uint16_t idx;			/* entry index */
-	uint32_t addr[4];		/* next hop IP or IPv6 address */
+	uint32_t addr;			/* next hop IP address */
 	struct ifnet *ifp;		/* outgoing interface */
 	uint16_t smt_idx;		/* SMT index */
 	uint16_t vlan;			/* VLAN TCI (id: 0-11, prio: 13-15) */
-	int ifindex;			/* interface index */
-	struct llentry *lle;		/* llentry for next hop */
 	struct l2t_entry *first;	/* start of hash chain */
 	struct l2t_entry *next;		/* next l2t_entry on chain */
-	struct mbuf *arpq_head;		/* list of mbufs awaiting resolution */
-	struct mbuf *arpq_tail;
+	STAILQ_HEAD(, wrqe) wr_list;	/* list of WRs awaiting resolution */
 	struct mtx lock;
 	volatile int refcnt;		/* entry reference count */
 	uint16_t hash;			/* hash bucket the entry is on */
-	uint8_t v6;			/* whether entry is for IPv6 */
 	uint8_t lport;			/* associated offload logical port */
 	uint8_t dmac[ETHER_ADDR_LEN];	/* next hop's MAC address */
 };
 
+struct l2t_data {
+	struct rwlock lock;
+	volatile int nfree;	/* number of free entries */
+	struct l2t_entry *rover;/* starting point for next allocation */
+	struct l2t_entry l2tab[L2T_SIZE];
+};
+
+
 int t4_init_l2t(struct adapter *, int);
 int t4_free_l2t(struct l2t_data *);
+struct l2t_entry *t4_alloc_l2e(struct l2t_data *);
 struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *);
 int t4_l2t_set_switching(struct adapter *, struct l2t_entry *, uint16_t,
     uint8_t, uint8_t *);
-void t4_l2t_release(struct l2t_entry *);
+int t4_write_l2e(struct adapter *, struct l2t_entry *, int);
+int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
+
+static inline void
+t4_l2t_release(struct l2t_entry *e)
+{
+	struct l2t_data *d = container_of(e, struct l2t_data, l2tab[e->idx]);
+
+	if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
+		atomic_add_int(&d->nfree, 1);
+}
+
+
 #ifdef SBUF_DRAIN
 int sysctl_l2t(SYSCTL_HANDLER_ARGS);
 #endif
 
-#ifndef TCP_OFFLOAD_DISABLE
-struct l2t_entry *t4_l2t_get(struct port_info *, struct ifnet *,
-    struct sockaddr *);
-int t4_l2t_send(struct adapter *, struct mbuf *, struct l2t_entry *);
-void t4_l2t_update(struct adapter *, struct llentry *);
-#endif
-
 #endif  /* __T4_L2T_H */
diff -r c30f3f89e105 -r 8a9451a986e0 head/sys/dev/cxgbe/t4_main.c
--- a/head/sys/dev/cxgbe/t4_main.c	Wed Jul 25 16:55:08 2012 +0300
+++ b/head/sys/dev/cxgbe/t4_main.c	Wed Jul 25 17:04:43 2012 +0300
@@ -26,9 +26,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_main.c 231172 2012-02-08 00:36:36Z np $");
+__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_main.c 238054 2012-07-03 06:56:11Z np $");
 
 #include "opt_inet.h"
+#include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -119,9 +120,13 @@
 
 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4 Ethernet driver and services");
 
+/*
+ * Correct lock order when you need to acquire multiple locks is t4_list_lock,
+ * then ADAPTER_LOCK, then t4_uld_list_lock.
+ */
 static struct mtx t4_list_lock;
 static SLIST_HEAD(, adapter) t4_list;
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 static struct mtx t4_uld_list_lock;
 static SLIST_HEAD(, uld_info) t4_uld_list;
 #endif
@@ -149,7 +154,7 @@
 static int t4_nrxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
 
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 #define NOFLDTXQ_10G 8
 static int t4_nofldtxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
@@ -174,7 +179,7 @@
 static int t4_tmr_idx_10g = TMR_IDX_10G;
 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
 
-#define PKTC_IDX_10G 2
+#define PKTC_IDX_10G (-1)
 static int t4_pktc_idx_10g = PKTC_IDX_10G;
 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
 
@@ -182,7 +187,7 @@
 static int t4_tmr_idx_1g = TMR_IDX_1G;
 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
 
-#define PKTC_IDX_1G 2
+#define PKTC_IDX_1G (-1)
 static int t4_pktc_idx_1g = PKTC_IDX_1G;
 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
 
@@ -217,7 +222,7 @@
 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
 TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
 
-static int t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
+static int t4_toecaps_allowed = -1;
 TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
 
 static int t4_rdmacaps_allowed = 0;
@@ -237,7 +242,7 @@
 	int nrxq10g;		/* # of NIC rxq's for each 10G port */
 	int ntxq1g;		/* # of NIC txq's for each 1G port */
 	int nrxq1g;		/* # of NIC rxq's for each 1G port */
-#ifndef TCP_OFFLOAD_DISABLE
+#ifdef TCP_OFFLOAD
 	int nofldtxq10g;	/* # of TOE txq's for each 10G port */
 	int nofldrxq10g;	/* # of TOE rxq's for each 10G port */
 	int nofldtxq1g;		/* # of TOE txq's for each 1G port */
@@ -297,8 +302,10 @@
     unsigned int);
 static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
 static void cxgbe_tick(void *);
+static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
 static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
     struct mbuf *);
+static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
 static int t4_sysctls(struct adapter *);
 static int cxgbe_sysctls(struct port_info *);
 static int sysctl_int_array(SYSCTL_