HardenedBSD/src b57b317. UPDATING Makefile.inc1, contrib/blacklist/bin blacklistd.c

MFHead @345353
DeltaFile
+1,346-1,365sys/netpfil/ipfw/nat64/nat64lsn.c
+240-210sys/netpfil/ipfw/nat64/nat64lsn_control.c
+168-263sys/netpfil/ipfw/nat64/nat64lsn.h
+162-99sbin/bectl/bectl_jail.c
+54-72sbin/ipfw/nat64lsn.c
+68-22tests/sys/netmap/ctrl-api-test.c
+49-16sys/dev/psci/psci.c
+40-18sys/dev/cpufreq/cpufreq_dt.c
+36-17sys/dev/cxgbe/common/t4_hw.c
+40-13sys/net/iflib.c
+37-8sys/netinet6/ip_fw_nat64.h
+19-17sys/dev/cxgbe/t4_main.c
+28-2sys/contrib/dev/ath/ath_hal/ar9300/ar9300_ani.c
+19-11sbin/ipfw/ipfw.8
+23-5sys/ufs/ffs/ffs_softdep.c
+11-11sys/conf/files
+21-0contrib/llvm/tools/lld/ELF/Writer.cpp
+20-0share/man/man9/sysctl.9
+20-0sys/conf/kern.post.mk
+20-0sys/dev/cxgbe/firmware/t4fw_interface.h
+19-0sys/sys/sysctl.h
+10-8stand/common/load_elf.c
+9-9sys/arm/include/platformvar.h
+18-0sys/conf/kmod.mk
+12-6sys/vm/vm_fault.c
+7-7sys/fs/fuse/fuse_node.c
+13-0sys/modules/fusefs/Makefile
+5-8sys/dev/cxgbe/t4_sge.c
+5-7sys/net/bpf.c
+8-4contrib/llvm/tools/lld/ELF/InputFiles.cpp
+2-9sys/dev/ixgbe/if_ixv.c
+9-1release/tools/ec2.conf
+5-5sys/fs/fuse/fuse_main.c
+6-4sys/dev/cxgbe/common/common.h
+5-5sys/fs/fuse/fuse_ipc.c
+0-10sys/modules/fuse/Makefile
+3-7sys/dev/cxgbe/tom/t4_tom.c
+1-8sys/dev/e1000/if_em.c
+4-4sys/dev/sound/pci/hda/hdac.c
+1-7lib/libjail/jail.c
+4-4sys/fs/fuse/fuse_vnops.c
+1-7usr.sbin/makefs/tests/makefs_tests_common.sh
+7-0sys/dev/pci/pcivar.h
+5-2usr.sbin/makefs/mtree.c
+6-1contrib/openmp/runtime/src/kmp.h
+4-2contrib/llvm/tools/lld/ELF/Options.td
+6-0sys/amd64/sgx/sgx.c
+6-0contrib/llvm/tools/lld/docs/ld.lld.1
+3-3sys/fs/fuse/fuse_vfsops.c
+3-3contrib/llvm/tools/lld/ELF/SymbolTable.h
+5-1contrib/llvm/tools/lld/ELF/InputFiles.h
+6-0UPDATING
+6-0stand/common/interp_forth.c
+5-1release/Makefile.ec2
+4-2sys/dev/cxgbe/adapter.h
+1-5sys/dev/ixl/if_iavf.c
+2-3sys/dev/cxgbe/t4_sched.c
+1-4sys/dev/ixgbe/if_ix.c
+1-4sys/dev/ixl/ixl_pf_main.c
+2-2Makefile.inc1
+4-0sys/sys/stat.h
+3-1sys/net/iflib.h
+3-0contrib/llvm/tools/lld/ELF/Driver.cpp
+1-1sys/sys/param.h
+1-1sbin/mount_fusefs/mount_fusefs.c
+1-1share/mk/src.opts.mk
+1-1lib/libomp/Makefile
+1-1sys/conf/NOTES
+1-1lib/clang/include/llvm/Support/VCSRevision.h
+1-1lib/clang/include/lld/Common/Version.inc
+1-1sys/conf/options
+1-1lib/clang/include/clang/Basic/Version.inc
+2-0sys/dev/ahci/ahci_pci.c
+0-2lib/Makefile
+1-1contrib/openmp/runtime/src/kmp_runtime.cpp
+1-1sys/dev/cxgbe/tom/t4_cpl_io.c
+1-1sys/dev/cxgbe/tom/t4_listen.c
+1-1sys/dev/extres/phy/phy.c
+1-1sys/dev/extres/regulator/regulator.c
+1-1sys/dev/extres/syscon/syscon.c
+2-0sys/dev/ichsmb/ichsmb_pci.c
+1-1sys/dev/psci/psci.h
+1-1sys/dev/sound/pci/hda/hdacc.c
+1-1sys/fs/fuse/fuse.h
+1-1sys/fs/fuse/fuse_file.c
+1-1sys/modules/Makefile
+2-0sys/modules/ipfw_nat64/Makefile
+1-1contrib/llvm/tools/lld/ELF/SymbolTable.cpp
+1-1contrib/llvm/tools/clang/lib/Basic/Version.cpp
+1-1contrib/blacklist/bin/blacklistd.c
+2-0usr.bin/lockf/lockf.c
+0-1sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
+1-0sbin/ipfw/ipfw2.h
+1-0contrib/llvm/tools/lld/ELF/Config.h
+2,684-2,32994 files

UnifiedSplitRaw

Makefile.inc1
@@ -1162,7 +1162,7 @@ buildworld_epilogue: .PHONY
@echo "--------------------------------------------------------------" @echo "--------------------------------------------------------------"
@echo ">>> World build completed on `LC_ALL=C date`" @echo ">>> World build completed on `LC_ALL=C date`"
@seconds=$$(($$(date '+%s') - ${_BUILDWORLD_START})); \ @seconds=$$(($$(date '+%s') - ${_BUILDWORLD_START})); \
- echo -n ">>> World build in $$seconds seconds, "; \+ echo -n ">>> World built in $$seconds seconds, "; \
echo "ncpu: $$(sysctl -n hw.ncpu)${.MAKE.JOBS:S/^/, make -j/}" echo "ncpu: $$(sysctl -n hw.ncpu)${.MAKE.JOBS:S/^/, make -j/}"
@echo "--------------------------------------------------------------" @echo "--------------------------------------------------------------"
@@ -1648,7 +1648,7 @@ buildkernel: .MAKE .PHONY
.endfor .endfor
@seconds=$$(($$(date '+%s') - ${_BUILDKERNEL_START})); \ @seconds=$$(($$(date '+%s') - ${_BUILDKERNEL_START})); \
- echo -n ">>> Kernel(s) build for${BUILDKERNELS} in $$seconds seconds, "; \+ echo -n ">>> Kernel(s) ${BUILDKERNELS} built in $$seconds seconds, "; \
echo "ncpu: $$(sysctl -n hw.ncpu)${.MAKE.JOBS:S/^/, make -j/}" echo "ncpu: $$(sysctl -n hw.ncpu)${.MAKE.JOBS:S/^/, make -j/}"
@echo "--------------------------------------------------------------" @echo "--------------------------------------------------------------"
UPDATING
@@ -31,6 +31,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW:
disable the most expensive debugging functionality run disable the most expensive debugging functionality run
"ln -s 'abort:false,junk:false' /etc/malloc.conf".) "ln -s 'abort:false,junk:false' /etc/malloc.conf".)
+20190320:
+ The fuse(4) module has been renamed to fusefs(4) for consistency with
+ other filesystems. You should update any kld_load="fuse" entries in
+ /etc/rc.conf, fuse_load="YES" entries in /boot/loader.conf, and
+ "options FUSE" enties in kernel config files.
+
20190304: 20190304:
Clang, llvm, lld, lldb, compiler-rt and libc++ have been upgraded to Clang, llvm, lld, lldb, compiler-rt and libc++ have been upgraded to
8.0.0. Please see the 20141231 entry below for information about 8.0.0. Please see the 20141231 entry below for information about
contrib/blacklist/bin/blacklistd.c
@@ -328,7 +328,7 @@ again:
if (dbi.id[0]) { if (dbi.id[0]) {
run_change("rem", &c, dbi.id, 0); run_change("rem", &c, dbi.id, 0);
sockaddr_snprintf(buf, sizeof(buf), "%a", ss); sockaddr_snprintf(buf, sizeof(buf), "%a", ss);
- syslog(LOG_INFO, "released %s/%d:%d after %d seconds",+ (*lfun)(LOG_INFO, "released %s/%d:%d after %d seconds",
buf, c.c_lmask, c.c_port, c.c_duration); buf, c.c_lmask, c.c_port, c.c_duration);
} }
state_del(state, &c); state_del(state, &c);
contrib/llvm/tools/clang/lib/Basic/Version.cpp
@@ -36,7 +36,7 @@ std::string getClangRepositoryPath() {
// If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us
// pick up a tag in an SVN export, for example. // pick up a tag in an SVN export, for example.
- StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/branches/release_80/lib/Basic/Version.cpp $");+ StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_800/final/lib/Basic/Version.cpp $");
if (URL.empty()) { if (URL.empty()) {
URL = SVNRepository.slice(SVNRepository.find(':'), URL = SVNRepository.slice(SVNRepository.find(':'),
SVNRepository.find("/lib/Basic")); SVNRepository.find("/lib/Basic"));
contrib/llvm/tools/lld/ELF/Config.h
@@ -122,6 +122,7 @@ struct Configuration {
uint64_t> uint64_t>
CallGraphProfile; CallGraphProfile;
bool AllowMultipleDefinition; bool AllowMultipleDefinition;
+ bool AllowShlibUndefined;
bool AndroidPackDynRelocs; bool AndroidPackDynRelocs;
bool ARMHasBlx = false; bool ARMHasBlx = false;
bool ARMHasMovtMovw = false; bool ARMHasMovtMovw = false;
contrib/llvm/tools/lld/ELF/Driver.cpp
@@ -758,6 +758,9 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
Args.hasFlag(OPT_allow_multiple_definition, Args.hasFlag(OPT_allow_multiple_definition,
OPT_no_allow_multiple_definition, false) || OPT_no_allow_multiple_definition, false) ||
hasZOption(Args, "muldefs"); hasZOption(Args, "muldefs");
+ Config->AllowShlibUndefined =
+ Args.hasFlag(OPT_allow_shlib_undefined, OPT_no_allow_shlib_undefined,
+ Args.hasArg(OPT_shared));
Config->AuxiliaryList = args::getStrings(Args, OPT_auxiliary); Config->AuxiliaryList = args::getStrings(Args, OPT_auxiliary);
Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic);
Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions);
contrib/llvm/tools/lld/ELF/InputFiles.cpp
@@ -865,7 +865,7 @@ SharedFile<ELFT>::SharedFile(MemoryBufferRef M, StringRef DefaultSoName)
// Partially parse the shared object file so that we can call // Partially parse the shared object file so that we can call
// getSoName on this object. // getSoName on this object.
-template <class ELFT> void SharedFile<ELFT>::parseSoName() {+template <class ELFT> void SharedFile<ELFT>::parseDynamic() {
const Elf_Shdr *DynamicSec = nullptr; const Elf_Shdr *DynamicSec = nullptr;
const ELFFile<ELFT> Obj = this->getObj(); const ELFFile<ELFT> Obj = this->getObj();
ArrayRef<Elf_Shdr> Sections = CHECK(Obj.sections(), this); ArrayRef<Elf_Shdr> Sections = CHECK(Obj.sections(), this);
@@ -902,12 +902,16 @@ template <class ELFT> void SharedFile<ELFT>::parseSoName() {
ArrayRef<Elf_Dyn> Arr = ArrayRef<Elf_Dyn> Arr =
CHECK(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), this); CHECK(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), this);
for (const Elf_Dyn &Dyn : Arr) { for (const Elf_Dyn &Dyn : Arr) {
- if (Dyn.d_tag == DT_SONAME) {+ if (Dyn.d_tag == DT_NEEDED) {
+ uint64_t Val = Dyn.getVal();
+ if (Val >= this->StringTable.size())
+ fatal(toString(this) + ": invalid DT_NEEDED entry");
+ DtNeeded.push_back(this->StringTable.data() + Val);
+ } else if (Dyn.d_tag == DT_SONAME) {
uint64_t Val = Dyn.getVal(); uint64_t Val = Dyn.getVal();
if (Val >= this->StringTable.size()) if (Val >= this->StringTable.size())
fatal(toString(this) + ": invalid DT_SONAME entry"); fatal(toString(this) + ": invalid DT_SONAME entry");
SoName = this->StringTable.data() + Val; SoName = this->StringTable.data() + Val;
- return;
} }
} }
} }
@@ -975,7 +979,7 @@ uint32_t SharedFile<ELFT>::getAlignment(ArrayRef<Elf_Shdr> Sections,
return (Ret > UINT32_MAX) ? 0 : Ret; return (Ret > UINT32_MAX) ? 0 : Ret;
} }
-// Fully parse the shared object file. This must be called after parseSoName().+// Fully parse the shared object file. This must be called after parseDynamic().
// //
// This function parses symbol versions. If a DSO has version information, // This function parses symbol versions. If a DSO has version information,
// the file has a ".gnu.version_d" section which contains symbol version // the file has a ".gnu.version_d" section which contains symbol version
contrib/llvm/tools/lld/ELF/InputFiles.h
@@ -323,6 +323,7 @@ template <class ELFT> class SharedFile : public ELFFileBase<ELFT> {
public: public:
std::vector<const Elf_Verdef *> Verdefs; std::vector<const Elf_Verdef *> Verdefs;
+ std::vector<StringRef> DtNeeded;
std::string SoName; std::string SoName;
static bool classof(const InputFile *F) { static bool classof(const InputFile *F) {
@@ -331,7 +332,7 @@ public:
SharedFile(MemoryBufferRef M, StringRef DefaultSoName); SharedFile(MemoryBufferRef M, StringRef DefaultSoName);
- void parseSoName();+ void parseDynamic();
void parseRest(); void parseRest();
uint32_t getAlignment(ArrayRef<Elf_Shdr> Sections, const Elf_Sym &Sym); uint32_t getAlignment(ArrayRef<Elf_Shdr> Sections, const Elf_Sym &Sym);
std::vector<const Elf_Verdef *> parseVerdefs(); std::vector<const Elf_Verdef *> parseVerdefs();
@@ -349,6 +350,9 @@ public:
// data structures in the output file. // data structures in the output file.
std::map<const Elf_Verdef *, NeededVer> VerdefMap; std::map<const Elf_Verdef *, NeededVer> VerdefMap;
+ // Used for --no-allow-shlib-undefined.
+ bool AllNeededIsKnown;
+
// Used for --as-needed // Used for --as-needed
bool IsNeeded; bool IsNeeded;
}; };
contrib/llvm/tools/lld/ELF/Options.td
@@ -63,6 +63,10 @@ defm allow_multiple_definition: B<"allow-multiple-definition",
"Allow multiple definitions", "Allow multiple definitions",
"Do not allow multiple definitions (default)">; "Do not allow multiple definitions (default)">;
+defm allow_shlib_undefined: B<"allow-shlib-undefined",
+ "Allow unresolved references in shared libraries (default when linking a shared library)",
+ "Do not allow unresolved references in shared libraries (default when linking an executable)">;
+
defm apply_dynamic_relocs: B<"apply-dynamic-relocs", defm apply_dynamic_relocs: B<"apply-dynamic-relocs",
"Apply link-time values for dynamic relocations", "Apply link-time values for dynamic relocations",
"Do not apply link-time values for dynamic relocations (default)">; "Do not apply link-time values for dynamic relocations (default)">;
@@ -492,12 +496,10 @@ def plugin_opt_thinlto: J<"plugin-opt=thinlto">;
def plugin_opt_slash: J<"plugin-opt=/">; def plugin_opt_slash: J<"plugin-opt=/">;
// Options listed below are silently ignored for now for compatibility. // Options listed below are silently ignored for now for compatibility.
-def: F<"allow-shlib-undefined">;
def: F<"detect-odr-violations">; def: F<"detect-odr-violations">;
def: Flag<["-"], "g">; def: Flag<["-"], "g">;
def: F<"long-plt">; def: F<"long-plt">;
def: F<"no-add-needed">; def: F<"no-add-needed">;
-def: F<"no-allow-shlib-undefined">;
def: F<"no-copy-dt-needed-entries">; def: F<"no-copy-dt-needed-entries">;
def: F<"no-ctors-in-init-array">; def: F<"no-ctors-in-init-array">;
def: F<"no-keep-memory">; def: F<"no-keep-memory">;
contrib/llvm/tools/lld/ELF/SymbolTable.cpp
@@ -93,7 +93,7 @@ template <class ELFT> void SymbolTable::addFile(InputFile *File) {
// .so file // .so file
if (auto *F = dyn_cast<SharedFile<ELFT>>(File)) { if (auto *F = dyn_cast<SharedFile<ELFT>>(File)) {
// DSOs are uniquified not by filename but by soname. // DSOs are uniquified not by filename but by soname.
- F->parseSoName();+ F->parseDynamic();
if (errorCount()) if (errorCount())
return; return;
contrib/llvm/tools/lld/ELF/SymbolTable.h
@@ -80,6 +80,9 @@ public:
void handleDynamicList(); void handleDynamicList();
+ // Set of .so files to not link the same shared object file more than once.
+ llvm::DenseMap<StringRef, InputFile *> SoNames;
+
private: private:
std::pair<Symbol *, bool> insertName(StringRef Name); std::pair<Symbol *, bool> insertName(StringRef Name);
@@ -107,9 +110,6 @@ private:
// is used to uniquify them. // is used to uniquify them.
llvm::DenseSet<llvm::CachedHashStringRef> ComdatGroups; llvm::DenseSet<llvm::CachedHashStringRef> ComdatGroups;
- // Set of .so files to not link the same shared object file more than once.
- llvm::DenseMap<StringRef, InputFile *> SoNames;
-
// A map from demangled symbol names to their symbol objects. // A map from demangled symbol names to their symbol objects.
// This mapping is 1:N because two symbols with different versions // This mapping is 1:N because two symbols with different versions
// can have the same name. We use this map to handle "extern C++ {}" // can have the same name. We use this map to handle "extern C++ {}"
contrib/llvm/tools/lld/ELF/Writer.cpp
@@ -1668,6 +1668,27 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
if (In.Iplt && !In.Iplt->empty()) if (In.Iplt && !In.Iplt->empty())
In.Iplt->addSymbols(); In.Iplt->addSymbols();
+ if (!Config->AllowShlibUndefined) {
+ // Error on undefined symbols in a shared object, if all of its DT_NEEDED
+ // entires are seen. These cases would otherwise lead to runtime errors
+ // reported by the dynamic linker.
+ //
+ // ld.bfd traces all DT_NEEDED to emulate the logic of the dynamic linker to
+ // catch more cases. That is too much for us. Our approach resembles the one
+ // used in ld.gold, achieves a good balance to be useful but not too smart.
+ for (InputFile *File : SharedFiles) {
+ SharedFile<ELFT> *F = cast<SharedFile<ELFT>>(File);
+ F->AllNeededIsKnown = llvm::all_of(F->DtNeeded, [&](StringRef Needed) {
+ return Symtab->SoNames.count(Needed);
+ });
+ }
+ for (Symbol *Sym : Symtab->getSymbols())
+ if (Sym->isUndefined() && !Sym->isWeak())
+ if (auto *F = dyn_cast_or_null<SharedFile<ELFT>>(Sym->File))
+ if (F->AllNeededIsKnown)
+ error(toString(F) + ": undefined reference to " + toString(*Sym));
+ }
+
// Now that we have defined all possible global symbols including linker- // Now that we have defined all possible global symbols including linker-
// synthesized ones. Visit all symbols to give the finishing touches. // synthesized ones. Visit all symbols to give the finishing touches.
for (Symbol *Sym : Symtab->getSymbols()) { for (Symbol *Sym : Symtab->getSymbols()) {
contrib/llvm/tools/lld/docs/ld.lld.1
@@ -56,6 +56,9 @@ option.
.It Fl -allow-multiple-definition .It Fl -allow-multiple-definition
Do not error if a symbol is defined multiple times. Do not error if a symbol is defined multiple times.
The first definition will be used. The first definition will be used.
+.It Fl -allow-shlib-undefined
+Allow unresolved references in shared libraries.
+This option is enabled by default when linking a shared library.
.It Fl -apply-dynamic-relocs .It Fl -apply-dynamic-relocs
Apply link-time values for dynamic relocations. Apply link-time values for dynamic relocations.
.It Fl -as-needed .It Fl -as-needed
@@ -252,6 +255,9 @@ Set target emulation.
.It Fl -Map Ns = Ns Ar file , Fl M Ar file .It Fl -Map Ns = Ns Ar file , Fl M Ar file
Print a link map to Print a link map to
.Ar file . .Ar file .
+.It Fl -no-allow-shlib-undefined
+Do not allow unresolved references in shared libraries.
+This option is enabled by default when linking an executable.
.It Fl -no-as-needed .It Fl -no-as-needed
Always set Always set
.Dv DT_NEEDED .Dv DT_NEEDED
contrib/openmp/runtime/src/kmp.h
@@ -3666,8 +3666,13 @@ extern int __kmp_read_from_file(char const *path, char const *format, ...);
extern void __kmp_query_cpuid(kmp_cpuinfo_t *p); extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
-#define __kmp_load_mxcsr(p) _mm_setcsr(*(p))+#if __SSE__
+static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*(p)); }
static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); } static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
+#else
+static inline void __kmp_load_mxcsr(const kmp_uint32 *) {}
+static inline void __kmp_store_mxcsr(kmp_uint32 *) {}
+#endif
extern void __kmp_load_x87_fpu_control_word(kmp_int16 *p); extern void __kmp_load_x87_fpu_control_word(kmp_int16 *p);
extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p); extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
contrib/openmp/runtime/src/kmp_runtime.cpp
@@ -8104,7 +8104,7 @@ __kmp_determine_reduction_method(
#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
-#if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_HURD+#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
// basic tuning // basic tuning
lib/Makefile
@@ -196,9 +196,7 @@ _libproc= libproc
_librtld_db= librtld_db _librtld_db= librtld_db
.endif .endif
-.if !defined(COMPAT_32BIT)
SUBDIR.${MK_OPENMP}+= libomp SUBDIR.${MK_OPENMP}+= libomp
-.endif
SUBDIR.${MK_OPENSSL}+= libmp SUBDIR.${MK_OPENSSL}+= libmp
SUBDIR.${MK_PMC}+= libpmc libpmcstat SUBDIR.${MK_PMC}+= libpmc libpmcstat
SUBDIR.${MK_RADIUS_SUPPORT}+= libradius SUBDIR.${MK_RADIUS_SUPPORT}+= libradius
lib/clang/include/clang/Basic/Version.inc
@@ -8,4 +8,4 @@
#define CLANG_VENDOR "FreeBSD " #define CLANG_VENDOR "FreeBSD "
-#define SVN_REVISION "356034"+#define SVN_REVISION "356365"
lib/clang/include/lld/Common/Version.inc
@@ -7,4 +7,4 @@
#define LLD_REPOSITORY_STRING "FreeBSD" #define LLD_REPOSITORY_STRING "FreeBSD"
// <Upstream revision at import>-<Local identifier in __FreeBSD_version style> // <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
-#define LLD_REVISION_STRING "356034-1300002"+#define LLD_REVISION_STRING "356365-1300003"
lib/clang/include/llvm/Support/VCSRevision.h
@@ -1,2 +1,2 @@
/* $FreeBSD$ */ /* $FreeBSD$ */
-#define LLVM_REVISION "svn-r356034"+#define LLVM_REVISION "svn-r356365"
lib/libjail/jail.c
@@ -1050,14 +1050,8 @@ kldload_param(const char *name)
kl = kldload(name); kl = kldload(name);
else if (strncmp(name, "allow.mount.", 12) == 0) { else if (strncmp(name, "allow.mount.", 12) == 0) {
/* Load the matching filesystem */ /* Load the matching filesystem */
- const char *modname;+ const char *modname = name + 12;
- if (strcmp("fusefs", name + 12) == 0 ||
- strcmp("nofusefs", name + 12) == 0) {
- modname = "fuse";
- } else {
- modname = name + 12;
- }
kl = kldload(modname); kl = kldload(modname);
if (kl < 0 && errno == ENOENT && if (kl < 0 && errno == ENOENT &&
strncmp(modname, "no", 2) == 0) strncmp(modname, "no", 2) == 0)
lib/libomp/Makefile
@@ -58,7 +58,6 @@ CXXFLAGS+= -fno-exceptions
CXXFLAGS+= -fno-rtti CXXFLAGS+= -fno-rtti
LDFLAGS+= -Wl,--warn-shared-textrel LDFLAGS+= -Wl,--warn-shared-textrel
-LDFLAGS+= -Wl,--as-needed
LDFLAGS+= -Wl,--gc-sections LDFLAGS+= -Wl,--gc-sections
LDFLAGS+= -Wl,-z,noexecstack LDFLAGS+= -Wl,-z,noexecstack
LDFLAGS+= -Wl,-fini=__kmp_internal_end_fini LDFLAGS+= -Wl,-fini=__kmp_internal_end_fini
@@ -67,5 +66,6 @@ LDFLAGS+= -Wl,-soname,libomp.so
VERSION_MAP= ${OMPSRC}/exports_so.txt VERSION_MAP= ${OMPSRC}/exports_so.txt
LIBADD+= pthread LIBADD+= pthread
+LIBADD+= m
.include <bsd.lib.mk> .include <bsd.lib.mk>
release/Makefile.ec2
@@ -42,6 +42,9 @@ PUBLICSNAP= --publicsnap
EC2SNSREL= ${REVISION}-${BRANCH} EC2SNSREL= ${REVISION}-${BRANCH}
EC2SNSVERS= ${EC2_SVNBRANCH}@${EC2_SVNREV} EC2SNSVERS= ${EC2_SVNBRANCH}@${EC2_SVNREV}
.endif .endif
+.if ${TARGET_ARCH} != "amd64"
+EC2ARCH= --${TARGET_ARCH:S/aarch64/arm64/}
+.endif
CLEANFILES+= ec2ami CLEANFILES+= ec2ami
@@ -82,7 +85,8 @@ ec2ami: cw-ec2 ${CW_EC2_PORTINSTALL}
@echo "--------------------------------------------------------------" @echo "--------------------------------------------------------------"
@false @false
.endif .endif
- /usr/local/bin/bsdec2-image-upload ${PUBLISH} ${PUBLICSNAP} --sriov --ena \+ /usr/local/bin/bsdec2-image-upload ${PUBLISH} ${PUBLICSNAP} \
+ ${EC2ARCH} --sriov --ena \
${.OBJDIR}/ec2.raw \ ${.OBJDIR}/ec2.raw \
"${TYPE} ${REVISION}-${BRANCH}-${TARGET}${AMINAMESUFFIX}" \ "${TYPE} ${REVISION}-${BRANCH}-${TARGET}${AMINAMESUFFIX}" \
"${TYPE}/${TARGET} ${EC2_SVNBRANCH}@${EC2_SVNREV}" \ "${TYPE}/${TARGET} ${EC2_SVNBRANCH}@${EC2_SVNREV}" \
release/tools/ec2.conf
@@ -6,7 +6,15 @@
# Packages to install into the image we're creating. This is a deliberately # Packages to install into the image we're creating. This is a deliberately
# minimalist set, providing only the packages necessary to bootstrap further # minimalist set, providing only the packages necessary to bootstrap further
# package installation as specified via EC2 user-data. # package installation as specified via EC2 user-data.
-export VM_EXTRA_PACKAGES="ec2-scripts firstboot-freebsd-update firstboot-pkgs dual-dhclient amazon-ssm-agent"+export VM_EXTRA_PACKAGES="ec2-scripts firstboot-freebsd-update firstboot-pkgs dual-dhclient"
+
+# Include the amazon-ssm-agent package in amd64 images, since some users want
+# to be able to use it on systems which are not connected to the Internet.
+# (It is not enabled by default, however.) This package does not exist for
+# aarch64, so we have to be selective about when we install it.
+if [ "${TARGET_ARCH}" = "amd64" ]; then
+ export VM_EXTRA_PACKAGES="${VM_EXTRA_PACKAGES} amazon-ssm-agent"
+fi
# Set to a list of third-party software to enable in rc.conf(5). # Set to a list of third-party software to enable in rc.conf(5).
export VM_RC_LIST="ec2_configinit ec2_fetchkey ec2_loghostkey firstboot_freebsd_update firstboot_pkgs ntpd" export VM_RC_LIST="ec2_configinit ec2_fetchkey ec2_loghostkey firstboot_freebsd_update firstboot_pkgs ntpd"
sbin/bectl/bectl_jail.c
@@ -40,10 +40,10 @@ __FBSDID("$FreeBSD$");
#include <unistd.h> #include <unistd.h>
#include <be.h> #include <be.h>
-
#include "bectl.h" #include "bectl.h"
-static void jailparam_grow(void);+#define MNTTYPE_ZFS 222
+
static void jailparam_add(const char *name, const char *val); static void jailparam_add(const char *name, const char *val);
static int jailparam_del(const char *name); static int jailparam_del(const char *name);
static bool jailparam_addarg(char *arg); static bool jailparam_addarg(char *arg);
@@ -51,84 +51,28 @@ static int jailparam_delarg(char *arg);
static int bectl_search_jail_paths(const char *mnt); static int bectl_search_jail_paths(const char *mnt);
static int bectl_locate_jail(const char *ident); static int bectl_locate_jail(const char *ident);
+static int bectl_jail_cleanup(char *mountpoint, int jid);
-/* We'll start with 8 parameters initially and grow as needed. */
-#define INIT_PARAMCOUNT 8
-
-static struct jailparam *jp;
-static int jpcnt;
-static int jpused;
static char mnt_loc[BE_MAXPATHLEN]; static char mnt_loc[BE_MAXPATHLEN];
+static nvlist_t *jailparams;
-static void+static const char *disabled_params[] = {
-jailparam_grow(void)+ "command", "exec.start", "nopersist", "persist", NULL
-{+};
- jpcnt *= 2;
- jp = realloc(jp, jpcnt * sizeof(*jp));
- if (jp == NULL)
- err(2, "realloc");
-}
static void static void
jailparam_add(const char *name, const char *val) jailparam_add(const char *name, const char *val)
{ {
- int i;
- for (i = 0; i < jpused; ++i) {+ nvlist_add_string(jailparams, name, val);
- if (strcmp(name, jp[i].jp_name) == 0)
- break;
- }
-
- if (i < jpused)
- jailparam_free(&jp[i], 1);
- else if (jpused == jpcnt)
- /* The next slot isn't allocated yet */
- jailparam_grow();
-
- if (jailparam_init(&jp[i], name) != 0)
- return;
- if (jailparam_import(&jp[i], val) != 0)
- return;
- ++jpused;
} }
static int static int
jailparam_del(const char *name) jailparam_del(const char *name)
{ {
- int i;
- char *val;
- for (i = 0; i < jpused; ++i) {+ nvlist_remove_all(jailparams, name);
- if (strcmp(name, jp[i].jp_name) == 0)
- break;
- }
-
- if (i == jpused)
- return (ENOENT);
-
- for (; i < jpused - 1; ++i) {
- val = jailparam_export(&jp[i + 1]);
-
- jailparam_free(&jp[i], 1);
- /*
- * Given the context, the following will really only fail if
- * they can't allocate the copy of the name or value.
- */
- if (jailparam_init(&jp[i], jp[i + 1].jp_name) != 0) {
- free(val);
- return (ENOMEM);
- }
- if (jailparam_import(&jp[i], val) != 0) {
- jailparam_free(&jp[i], 1);
- free(val);
- return (ENOMEM);
- }
- free(val);
- }
-
- jailparam_free(&jp[i], 1);
- --jpused;
return (0); return (0);
} }
@@ -136,6 +80,7 @@ static bool
jailparam_addarg(char *arg) jailparam_addarg(char *arg)
{ {
char *name, *val; char *name, *val;
+ size_t i, len;
if (arg == NULL) if (arg == NULL)
return (false); return (false);
@@ -156,6 +101,15 @@ jailparam_addarg(char *arg)
} }
strlcpy(mnt_loc, val, sizeof(mnt_loc)); strlcpy(mnt_loc, val, sizeof(mnt_loc));
} }
+
+ for (i = 0; disabled_params[i] != NULL; i++) {
+ len = strlen(disabled_params[i]);
+ if (strncmp(disabled_params[i], name, len) == 0) {
+ fprintf(stderr, "invalid jail parameter: %s\n", name);
+ return (false);
+ }
+ }
+
jailparam_add(name, val); jailparam_add(name, val);
return (true); return (true);
} }
@@ -176,21 +130,127 @@ jailparam_delarg(char *arg)
return (jailparam_del(name)); return (jailparam_del(name));
} }
+static int
+build_jailcmd(char ***argvp, bool interactive, int argc, char *argv[])
+{
+ char *cmd, **jargv, *name, *val;
+ nvpair_t *nvp;
+ size_t i, iarg, nargv;
+
+ cmd = NULL;
+ nvp = NULL;
+ iarg = i = 0;
+ if (nvlist_size(jailparams, &nargv, NV_ENCODE_NATIVE) != 0)
+ return (1);
+
+ /*
+ * Number of args + "/usr/sbin/jail", "-c", and ending NULL.
+ * If interactive also include command.
+ */
+ nargv += 3;
+ if (interactive) {
+ if (argc == 0)
+ nargv++;
+ else
+ nargv += argc;
+ }
+
+ jargv = *argvp = calloc(nargv, sizeof(jargv));
+ if (jargv == NULL)
+ err(2, "calloc");
+
+ jargv[iarg++] = strdup("/usr/sbin/jail");
+ jargv[iarg++] = strdup("-c");
+ while ((nvp = nvlist_next_nvpair(jailparams, nvp)) != NULL) {
+ name = nvpair_name(nvp);
+ if (nvpair_value_string(nvp, &val) != 0)
+ continue;
+
+ if (asprintf(&jargv[iarg++], "%s=%s", name, val) < 0)
+ goto error;
+ }
+ if (interactive) {
+ if (argc < 1)
+ cmd = strdup("/bin/sh");
+ else {
+ cmd = argv[0];
+ argc--;
+ argv++;
+ }
+
+ if (asprintf(&jargv[iarg++], "command=%s", cmd) < 0) {
+ goto error;
+ }
+ if (argc < 1) {
+ free(cmd);
+ cmd = NULL;
+ }
+
+ for (; argc > 0; argc--) {
+ if (asprintf(&jargv[iarg++], "%s", argv[0]) < 0)
+ goto error;
+ argv++;
+ }
+ }
+
+ return (0);
+
+error:
+ if (interactive && argc < 1)
+ free(cmd);
+ for (; i < iarg - 1; i++) {
+ free(jargv[i]);
+ }
+ free(jargv);
+ return (1);
+}
+
+/* Remove jail and cleanup any non zfs mounts. */
+static int
+bectl_jail_cleanup(char *mountpoint, int jid)
+{
+ struct statfs *mntbuf;
+ size_t i, searchlen, mntsize;
+
+ if (jid >= 0 && jail_remove(jid) != 0) {
+ fprintf(stderr, "unable to remove jail");
+ return (1);
+ }
+
+ searchlen = strnlen(mountpoint, MAXPATHLEN);
+ mntsize = getmntinfo(&mntbuf, MNT_NOWAIT);
+ for (i = 0; i < mntsize; i++) {
+ if (strncmp(mountpoint, mntbuf[i].f_mntonname, searchlen) == 0 &&
+ mntbuf[i].f_type != MNTTYPE_ZFS) {
+
+ if (unmount(mntbuf[i].f_mntonname, 0) != 0) {
+ fprintf(stderr, "bectl jail: unable to unmount filesystem %s",
+ mntbuf[i].f_mntonname);
+ return (1);
+ }
+ }
+ }
+
+ return (0);
+}
+
int int
bectl_cmd_jail(int argc, char *argv[]) bectl_cmd_jail(int argc, char *argv[])
{ {
- char *bootenv, *mountpoint;+ char *bootenv, **jargv, *mountpoint;
- int jid, mntflags, opt, ret;+ int i, jid, mntflags, opt, ret;
bool default_hostname, interactive, unjail; bool default_hostname, interactive, unjail;
pid_t pid; pid_t pid;
+
/* XXX TODO: Allow shallow */ /* XXX TODO: Allow shallow */
mntflags = BE_MNT_DEEP; mntflags = BE_MNT_DEEP;
default_hostname = interactive = unjail = true; default_hostname = interactive = unjail = true;
- jpcnt = INIT_PARAMCOUNT;+
- jp = malloc(jpcnt * sizeof(*jp));+ if ((nvlist_alloc(&jailparams, NV_UNIQUE_NAME, 0)) != 0) {
- if (jp == NULL)+ fprintf(stderr, "nvlist_alloc() failed\n");
- err(2, "malloc");+ return (1);
+ }
jailparam_add("persist", "true"); jailparam_add("persist", "true");
jailparam_add("allow.mount", "true"); jailparam_add("allow.mount", "true");
@@ -210,6 +270,8 @@ bectl_cmd_jail(int argc, char *argv[])
*/ */
if (strcmp(optarg, "host.hostname") == 0) if (strcmp(optarg, "host.hostname") == 0)
default_hostname = false; default_hostname = false;
+ } else {
+ return (1);
} }
break; break;
case 'U': case 'U':
@@ -236,13 +298,14 @@ bectl_cmd_jail(int argc, char *argv[])
argc -= optind; argc -= optind;
argv += optind; argv += optind;
- /* struct jail be_jail = { 0 }; */
if (argc < 1) { if (argc < 1) {
fprintf(stderr, "bectl jail: missing boot environment name\n"); fprintf(stderr, "bectl jail: missing boot environment name\n");
return (usage(false)); return (usage(false));
} }
bootenv = argv[0]; bootenv = argv[0];
+ argc--;
+ argv++;
/* /*
* XXX TODO: if its already mounted, perhaps there should be a flag to * XXX TODO: if its already mounted, perhaps there should be a flag to
@@ -264,45 +327,46 @@ bectl_cmd_jail(int argc, char *argv[])
* This is our indicator that path was not set by the user, so we'll use * This is our indicator that path was not set by the user, so we'll use
* the path that libbe generated for us. * the path that libbe generated for us.
*/ */
- if (mountpoint == NULL)+ if (mountpoint == NULL) {
jailparam_add("path", mnt_loc); jailparam_add("path", mnt_loc);
- /* Create the jail for now, attach later as-needed */+ mountpoint = mnt_loc;
- jid = jailparam_set(jp, jpused, JAIL_CREATE);
- if (jid == -1) {
- fprintf(stderr, "unable to create jail. error: %d\n", errno);
- return (1);
} }
- jailparam_free(jp, jpused);+ if ((build_jailcmd(&jargv, interactive, argc, argv)) != 0) {
- free(jp);+ fprintf(stderr, "unable to build argument list for jail command\n");
-+ return (1);
- /* We're not interactive, nothing more to do here. */+ }
- if (!interactive)
- return (0);
pid = fork(); pid = fork();
- switch(pid) {+
+ switch (pid) {
case -1: case -1:
perror("fork"); perror("fork");
return (1); return (1);
case 0: case 0:
- jail_attach(jid);+ execv("/usr/sbin/jail", jargv);
- /* We're attached within the jail... good bye! */+ fprintf(stderr, "bectl jail: failed to execute\n");
- chdir("/");
- if (argc > 1)
- execve(argv[1], &argv[1], NULL);
- else
- execl("/bin/sh", "/bin/sh", NULL);
- fprintf(stderr, "bectl jail: failed to execute %s\n",
- (argc > 1 ? argv[1] : "/bin/sh"));
- _exit(1);
default: default:
- /* Wait for the child to get back, see if we need to unjail */
waitpid(pid, NULL, 0); waitpid(pid, NULL, 0);
} }
+ for (i = 0; jargv[i] != NULL; i++) {
+ free(jargv[i]);
+ }
+ free(jargv);
+
+ if (!interactive)
+ return (0);
+
if (unjail) { if (unjail) {
- jail_remove(jid);+ /*
+ * We're not checking the jail id result here because in the
+ * case of invalid param, or last command in jail was an error
+ * the jail will not exist upon exit. bectl_jail_cleanup will
+ * only jail_remove if the jid is >= 0.
+ */
+ jid = bectl_locate_jail(bootenv);
+ bectl_jail_cleanup(mountpoint, jid);
be_unmount(be, bootenv, 0); be_unmount(be, bootenv, 0);
} }
@@ -319,7 +383,6 @@ bectl_search_jail_paths(const char *mnt)
/* jail_getv expects name/value strings */ /* jail_getv expects name/value strings */
snprintf(lastjid, sizeof(lastjid), "%d", 0); snprintf(lastjid, sizeof(lastjid), "%d", 0);
- jid = 0;
while ((jid = jail_getv(0, "lastjid", lastjid, "path", &jailpath, while ((jid = jail_getv(0, "lastjid", lastjid, "path", &jailpath,
NULL)) != -1) { NULL)) != -1) {
@@ -416,7 +479,7 @@ bectl_cmd_unjail(int argc, char *argv[])
return (1); return (1);
} }
- jail_remove(jid);+ bectl_jail_cleanup(path, jid);
be_unmount(be, target, 0); be_unmount(be, target, 0);
return (0); return (0);
sbin/ipfw/ipfw.8
@@ -1,7 +1,7 @@
.\" .\"
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
-.Dd March 18, 2019+.Dd March 19, 2019
.Dt IPFW 8 .Dt IPFW 8
.Os .Os
.Sh NAME .Sh NAME
@@ -3300,6 +3300,7 @@ See
.Sx SYSCTL VARIABLES .Sx SYSCTL VARIABLES
for more info. for more info.
.Sh IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION .Sh IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
+.Ss Stateful translation
.Nm .Nm
supports in-kernel IPv6/IPv4 network address and protocol translation. supports in-kernel IPv6/IPv4 network address and protocol translation.
Stateful NAT64 translation allows IPv6-only clients to contact IPv4 servers Stateful NAT64 translation allows IPv6-only clients to contact IPv4 servers
@@ -3317,7 +3318,8 @@ to be able use stateful NAT64 translator.
Stateful NAT64 uses a bunch of memory for several types of objects. Stateful NAT64 uses a bunch of memory for several types of objects.
When IPv6 client initiates connection, NAT64 translator creates a host entry When IPv6 client initiates connection, NAT64 translator creates a host entry
in the states table. in the states table.
-Each host entry has a number of ports group entries allocated on demand.+Each host entry uses preallocated IPv4 alias entry.
+Each alias entry has a number of ports group entries allocated on demand.
Ports group entries contains connection state entries. Ports group entries contains connection state entries.
There are several options to control limits and lifetime for these objects. There are several options to control limits and lifetime for these objects.
.Pp .Pp
@@ -3337,6 +3339,11 @@ First time an original packet is handled and consumed by translator,
and then it is handled again as translated packet. and then it is handled again as translated packet.
This behavior can be changed by sysctl variable This behavior can be changed by sysctl variable
.Va net.inet.ip.fw.nat64_direct_output . .Va net.inet.ip.fw.nat64_direct_output .
+Also translated packet can be tagged using
+.Cm tag
+rule action, and then matched by
+.Cm tagged
+opcode to avoid loops and extra overhead.
.Pp .Pp
The stateful NAT64 configuration command is the following: The stateful NAT64 configuration command is the following:
.Bd -ragged -offset indent .Bd -ragged -offset indent
@@ -3364,15 +3371,16 @@ to represent IPv4 addresses. This IPv6 prefix should be configured in DNS64.
The translator implementation follows RFC6052, that restricts the length of The translator implementation follows RFC6052, that restricts the length of
prefixes to one of following: 32, 40, 48, 56, 64, or 96. prefixes to one of following: 32, 40, 48, 56, 64, or 96.
The Well-Known IPv6 Prefix 64:ff9b:: must be 96 bits long. The Well-Known IPv6 Prefix 64:ff9b:: must be 96 bits long.
-.It Cm max_ports Ar number+The special
-Maximum number of ports reserved for upper level protocols to one IPv6 client.+.Ar ::/length
-All reserved ports are divided into chunks between supported protocols.+prefix can be used to handle several IPv6 prefixes with one NAT64 instance.
-The number of connections from one IPv6 client is limited by this option.+The NAT64 instance will determine a destination IPv4 address from prefix
-Note that closed TCP connections still remain in the list of connections until+.Ar length .
-.Cm tcp_close_age+.It Cm states_chunks Ar number
-interval will not expire.+The number of states chunks in single ports group.
-Default value is+Each ports group by default can keep 64 state entries in single chunk.
-.Ar 2048 .+The above value affects the maximum number of states that can be associated with single IPv4 alias address and port.
+The value must be power of 2, and up to 128.
.It Cm host_del_age Ar seconds .It Cm host_del_age Ar seconds
The number of seconds until the host entry for a IPv6 client will be deleted The number of seconds until the host entry for a IPv6 client will be deleted
and all its resources will be released due to inactivity. and all its resources will be released due to inactivity.
sbin/ipfw/ipfw2.h
@@ -278,6 +278,7 @@ enum tokens {
TOK_AGG_LEN, TOK_AGG_LEN,
TOK_AGG_COUNT, TOK_AGG_COUNT,
TOK_MAX_PORTS, TOK_MAX_PORTS,
+ TOK_STATES_CHUNKS,
TOK_JMAXLEN, TOK_JMAXLEN,
TOK_PORT_RANGE, TOK_PORT_RANGE,
TOK_HOST_DEL_AGE, TOK_HOST_DEL_AGE,
sbin/ipfw/nat64lsn.c
@@ -87,68 +87,70 @@ nat64lsn_print_states(void *buf)
char sflags[4], *sf, *proto; char sflags[4], *sf, *proto;
ipfw_obj_header *oh; ipfw_obj_header *oh;
ipfw_obj_data *od; ipfw_obj_data *od;
- ipfw_nat64lsn_stg *stg;+ ipfw_nat64lsn_stg_v1 *stg;
- ipfw_nat64lsn_state *ste;+ ipfw_nat64lsn_state_v1 *ste;
uint64_t next_idx; uint64_t next_idx;
int i, sz; int i, sz;
oh = (ipfw_obj_header *)buf; oh = (ipfw_obj_header *)buf;
od = (ipfw_obj_data *)(oh + 1); od = (ipfw_obj_data *)(oh + 1);
- stg = (ipfw_nat64lsn_stg *)(od + 1);+ stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
sz = od->head.length - sizeof(*od); sz = od->head.length - sizeof(*od);
next_idx = 0; next_idx = 0;
while (sz > 0 && next_idx != 0xFF) { while (sz > 0 && next_idx != 0xFF) {
- next_idx = stg->next_idx;+ next_idx = stg->next.index;
sz -= sizeof(*stg); sz -= sizeof(*stg);
if (stg->count == 0) { if (stg->count == 0) {
stg++; stg++;
continue; continue;
} }
- switch (stg->proto) {+ /*
- case IPPROTO_TCP:+ * NOTE: addresses are in network byte order,
- proto = "TCP";+ * ports are in host byte order.
- break;+ */
- case IPPROTO_UDP:
- proto = "UDP";
- break;
- case IPPROTO_ICMPV6:
- proto = "ICMPv6";
- break;
- }
- inet_ntop(AF_INET6, &stg->host6, s, sizeof(s));
inet_ntop(AF_INET, &stg->alias4, a, sizeof(a)); inet_ntop(AF_INET, &stg->alias4, a, sizeof(a));
- ste = (ipfw_nat64lsn_state *)(stg + 1);+ ste = (ipfw_nat64lsn_state_v1 *)(stg + 1);
for (i = 0; i < stg->count && sz > 0; i++) { for (i = 0; i < stg->count && sz > 0; i++) {
sf = sflags; sf = sflags;
+ inet_ntop(AF_INET6, &ste->host6, s, sizeof(s));
inet_ntop(AF_INET, &ste->daddr, f, sizeof(f)); inet_ntop(AF_INET, &ste->daddr, f, sizeof(f));
- if (stg->proto == IPPROTO_TCP) {+ switch (ste->proto) {
+ case IPPROTO_TCP:
+ proto = "TCP";
if (ste->flags & 0x02) if (ste->flags & 0x02)
*sf++ = 'S'; *sf++ = 'S';
if (ste->flags & 0x04) if (ste->flags & 0x04)
*sf++ = 'E'; *sf++ = 'E';
if (ste->flags & 0x01) if (ste->flags & 0x01)
*sf++ = 'F'; *sf++ = 'F';
+ break;
+ case IPPROTO_UDP:
+ proto = "UDP";
+ break;
+ case IPPROTO_ICMP:
+ proto = "ICMPv6";
+ break;
} }
*sf = '\0'; *sf = '\0';
- switch (stg->proto) {+ switch (ste->proto) {
case IPPROTO_TCP: case IPPROTO_TCP:
case IPPROTO_UDP: case IPPROTO_UDP:
printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n", printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n",
s, ste->sport, a, ste->aport, proto, s, ste->sport, a, ste->aport, proto,
sflags, ste->idle, f, ste->dport); sflags, ste->idle, f, ste->dport);
break; break;
- case IPPROTO_ICMPV6:+ case IPPROTO_ICMP:
printf("%s\t%s\t%s\t\t%d\t%s\n", printf("%s\t%s\t%s\t\t%d\t%s\n",
s, a, proto, ste->idle, f); s, a, proto, ste->idle, f);
break; break;
default: default:
printf("%s\t%s\t%d\t\t%d\t%s\n", printf("%s\t%s\t%d\t\t%d\t%s\n",
- s, a, stg->proto, ste->idle, f);+ s, a, ste->proto, ste->idle, f);
} }
ste++; ste++;
sz -= sizeof(*ste); sz -= sizeof(*ste);
} }
- stg = (ipfw_nat64lsn_stg *)ste;+ stg = (ipfw_nat64lsn_stg_v1 *)ste;
} }
return (next_idx); return (next_idx);
} }
@@ -174,6 +176,7 @@ nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
err(EX_OSERR, NULL); err(EX_OSERR, NULL);
do { do {
oh = (ipfw_obj_header *)buf; oh = (ipfw_obj_header *)buf;
+ oh->opheader.version = 1; /* Force using ov new API */
od = (ipfw_obj_data *)(oh + 1); od = (ipfw_obj_data *)(oh + 1);
nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set); nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set);
od->head.type = IPFW_TLV_OBJDATA; od->head.type = IPFW_TLV_OBJDATA;
@@ -363,12 +366,8 @@ nat64lsn_parse_int(const char *arg, const char *desc)
static struct _s_x nat64newcmds[] = { static struct _s_x nat64newcmds[] = {
{ "prefix6", TOK_PREFIX6 }, { "prefix6", TOK_PREFIX6 },
- { "agg_len", TOK_AGG_LEN }, /* not yet */
- { "agg_count", TOK_AGG_COUNT }, /* not yet */
- { "port_range", TOK_PORT_RANGE }, /* not yet */
{ "jmaxlen", TOK_JMAXLEN }, { "jmaxlen", TOK_JMAXLEN },
{ "prefix4", TOK_PREFIX4 }, { "prefix4", TOK_PREFIX4 },
- { "max_ports", TOK_MAX_PORTS },
{ "host_del_age", TOK_HOST_DEL_AGE }, { "host_del_age", TOK_HOST_DEL_AGE },
{ "pg_del_age", TOK_PG_DEL_AGE }, { "pg_del_age", TOK_PG_DEL_AGE },
{ "tcp_syn_age", TOK_TCP_SYN_AGE }, { "tcp_syn_age", TOK_TCP_SYN_AGE },
@@ -376,10 +375,13 @@ static struct _s_x nat64newcmds[] = {
{ "tcp_est_age", TOK_TCP_EST_AGE }, { "tcp_est_age", TOK_TCP_EST_AGE },
{ "udp_age", TOK_UDP_AGE }, { "udp_age", TOK_UDP_AGE },
{ "icmp_age", TOK_ICMP_AGE }, { "icmp_age", TOK_ICMP_AGE },
+ { "states_chunks",TOK_STATES_CHUNKS },
{ "log", TOK_LOG }, { "log", TOK_LOG },
{ "-log", TOK_LOGOFF }, { "-log", TOK_LOGOFF },
{ "allow_private", TOK_PRIVATE }, { "allow_private", TOK_PRIVATE },
{ "-allow_private", TOK_PRIVATEOFF }, { "-allow_private", TOK_PRIVATEOFF },
+ /* for compatibility with old configurations */
+ { "max_ports", TOK_MAX_PORTS }, /* unused */
{ NULL, 0 } { NULL, 0 }
}; };
@@ -436,34 +438,10 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6, nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6,
&cfg->plen6); &cfg->plen6);
if (ipfw_check_nat64prefix(&cfg->prefix6, if (ipfw_check_nat64prefix(&cfg->prefix6,
- cfg->plen6) != 0)+ cfg->plen6) != 0 &&
+ !IN6_IS_ADDR_UNSPECIFIED(&cfg->prefix6))
errx(EX_USAGE, "Bad prefix6 %s", *av); errx(EX_USAGE, "Bad prefix6 %s", *av);
- ac--; av++;
- break;
-#if 0
- case TOK_AGG_LEN:
- NEED1("Aggregation prefix len required");
- cfg->agg_prefix_len = nat64lsn_parse_int(*av, opt);
- ac--; av++;
- break;
- case TOK_AGG_COUNT:
- NEED1("Max per-prefix count required");
- cfg->agg_prefix_max = nat64lsn_parse_int(*av, opt);
- ac--; av++;
- break;
- case TOK_PORT_RANGE:
- NEED1("port range x[:y] required");
- if ((p = strchr(*av, ':')) == NULL)
- cfg->min_port = (uint16_t)nat64lsn_parse_int(
- *av, opt);
- else {
- *p++ = '\0';
- cfg->min_port = (uint16_t)nat64lsn_parse_int(
- *av, opt);
- cfg->max_port = (uint16_t)nat64lsn_parse_int(
- p, opt);
- }
ac--; av++; ac--; av++;
break; break;
case TOK_JMAXLEN: case TOK_JMAXLEN:
@@ -471,7 +449,6 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
cfg->jmaxlen = nat64lsn_parse_int(*av, opt); cfg->jmaxlen = nat64lsn_parse_int(*av, opt);
ac--; av++; ac--; av++;
break; break;
-#endif
case TOK_MAX_PORTS: case TOK_MAX_PORTS:
NEED1("Max per-user ports required"); NEED1("Max per-user ports required");
cfg->max_ports = nat64lsn_parse_int(*av, opt); cfg->max_ports = nat64lsn_parse_int(*av, opt);
@@ -519,6 +496,12 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
*av, opt); *av, opt);
ac--; av++; ac--; av++;
break; break;
+ case TOK_STATES_CHUNKS:
+ NEED1("number of chunks required");
+ cfg->states_chunks = (uint8_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
case TOK_LOG: case TOK_LOG:
cfg->flags |= NAT64_LOG; cfg->flags |= NAT64_LOG;
break; break;
@@ -630,6 +613,12 @@ nat64lsn_config(const char *name, uint8_t set, int ac, char **av)
*av, opt); *av, opt);
ac--; av++; ac--; av++;
break; break;
+ case TOK_STATES_CHUNKS:
+ NEED1("number of chunks required");
+ cfg->states_chunks = (uint8_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
case TOK_LOG: case TOK_LOG:
cfg->flags |= NAT64_LOG; cfg->flags |= NAT64_LOG;
break; break;
@@ -789,31 +778,24 @@ nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
printf("nat64lsn %s prefix4 %s/%u", cfg->name, abuf, cfg->plen4); printf("nat64lsn %s prefix4 %s/%u", cfg->name, abuf, cfg->plen4);
inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf)); inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf));
printf(" prefix6 %s/%u", abuf, cfg->plen6); printf(" prefix6 %s/%u", abuf, cfg->plen6);
-#if 0+ if (co.verbose || cfg->states_chunks > 1)
- printf("agg_len %u agg_count %u ", cfg->agg_prefix_len,+ printf(" states_chunks %u", cfg->states_chunks);
- cfg->agg_prefix_max);+ if (co.verbose || cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
- if (cfg->min_port != NAT64LSN_PORT_MIN ||
- cfg->max_port != NAT64LSN_PORT_MAX)
- printf(" port_range %u:%u", cfg->min_port, cfg->max_port);
- if (cfg->jmaxlen != NAT64LSN_JMAXLEN)
- printf(" jmaxlen %u ", cfg->jmaxlen);
-#endif
- if (cfg->max_ports != NAT64LSN_MAX_PORTS)
- printf(" max_ports %u", cfg->max_ports);
- if (cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
printf(" host_del_age %u", cfg->nh_delete_delay); printf(" host_del_age %u", cfg->nh_delete_delay);
- if (cfg->pg_delete_delay != NAT64LSN_PG_AGE)+ if (co.verbose || cfg->pg_delete_delay != NAT64LSN_PG_AGE)
- printf(" pg_del_age %u ", cfg->pg_delete_delay);+ printf(" pg_del_age %u", cfg->pg_delete_delay);
- if (cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)+ if (co.verbose || cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)
printf(" tcp_syn_age %u", cfg->st_syn_ttl); printf(" tcp_syn_age %u", cfg->st_syn_ttl);
- if (cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)+ if (co.verbose || cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)
printf(" tcp_close_age %u", cfg->st_close_ttl); printf(" tcp_close_age %u", cfg->st_close_ttl);
- if (cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)+ if (co.verbose || cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)
printf(" tcp_est_age %u", cfg->st_estab_ttl); printf(" tcp_est_age %u", cfg->st_estab_ttl);
- if (cfg->st_udp_ttl != NAT64LSN_UDP_AGE)+ if (co.verbose || cfg->st_udp_ttl != NAT64LSN_UDP_AGE)
printf(" udp_age %u", cfg->st_udp_ttl); printf(" udp_age %u", cfg->st_udp_ttl);
- if (cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)+ if (co.verbose || cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)
printf(" icmp_age %u", cfg->st_icmp_ttl); printf(" icmp_age %u", cfg->st_icmp_ttl);
+ if (co.verbose || cfg->jmaxlen != NAT64LSN_JMAXLEN)
+ printf(" jmaxlen %u", cfg->jmaxlen);
if (cfg->flags & NAT64_LOG) if (cfg->flags & NAT64_LOG)
printf(" log"); printf(" log");
if (cfg->flags & NAT64_ALLOW_PRIVATE) if (cfg->flags & NAT64_ALLOW_PRIVATE)
sbin/mount_fusefs/mount_fusefs.c
@@ -501,7 +501,7 @@ init_backgrounded(void)
len = sizeof(ibg); len = sizeof(ibg);
- if (sysctlbyname("vfs.fuse.init_backgrounded", &ibg, &len, NULL, 0))+ if (sysctlbyname("vfs.fusefs.init_backgrounded", &ibg, &len, NULL, 0))
return (0); return (0);
return (ibg); return (ibg);
share/man/man9/sysctl.9
@@ -48,6 +48,7 @@
.Nm SYSCTL_ADD_SBINTIME_MSEC , .Nm SYSCTL_ADD_SBINTIME_MSEC ,
.Nm SYSCTL_ADD_SBINTIME_USEC , .Nm SYSCTL_ADD_SBINTIME_USEC ,
.Nm SYSCTL_ADD_STRING , .Nm SYSCTL_ADD_STRING ,
+.Nm SYSCTL_ADD_CONST_STRING ,
.Nm SYSCTL_ADD_STRUCT , .Nm SYSCTL_ADD_STRUCT ,
.Nm SYSCTL_ADD_U8 , .Nm SYSCTL_ADD_U8 ,
.Nm SYSCTL_ADD_U16 , .Nm SYSCTL_ADD_U16 ,
@@ -82,6 +83,7 @@
.Nm SYSCTL_SBINTIME_MSEC , .Nm SYSCTL_SBINTIME_MSEC ,
.Nm SYSCTL_SBINTIME_USEC , .Nm SYSCTL_SBINTIME_USEC ,
.Nm SYSCTL_STRING , .Nm SYSCTL_STRING ,
+.Nm SYSCTL_CONST_STRING ,
.Nm SYSCTL_STRUCT , .Nm SYSCTL_STRUCT ,
.Nm SYSCTL_U8 , .Nm SYSCTL_U8 ,
.Nm SYSCTL_U16 , .Nm SYSCTL_U16 ,
@@ -291,6 +293,16 @@
.Fa "const char *descr" .Fa "const char *descr"
.Fc .Fc
.Ft struct sysctl_oid * .Ft struct sysctl_oid *
+.Fo SYSCTL_ADD_CONST_STRING
+.Fa "struct sysctl_ctx_list *ctx"
+.Fa "struct sysctl_oid_list *parent"
+.Fa "int number"
+.Fa "const char *name"
+.Fa "int ctlflags"
+.Fa "const char *ptr"
+.Fa "const char *descr"
+.Fc
+.Ft struct sysctl_oid *
.Fo SYSCTL_ADD_STRUCT .Fo SYSCTL_ADD_STRUCT
.Fa "struct sysctl_ctx_list *ctx" .Fa "struct sysctl_ctx_list *ctx"
.Fa "struct sysctl_oid_list *parent" .Fa "struct sysctl_oid_list *parent"
@@ -443,6 +455,7 @@
.Fn SYSCTL_SBINTIME_MSEC parent number name ctlflags ptr descr .Fn SYSCTL_SBINTIME_MSEC parent number name ctlflags ptr descr
.Fn SYSCTL_SBINTIME_USEC parent number name ctlflags ptr descr .Fn SYSCTL_SBINTIME_USEC parent number name ctlflags ptr descr
.Fn SYSCTL_STRING parent number name ctlflags arg len descr .Fn SYSCTL_STRING parent number name ctlflags arg len descr
+.Fn SYSCTL_CONST_STRING parent number name ctlflags arg descr
.Fn SYSCTL_STRUCT parent number name ctlflags ptr struct_type descr .Fn SYSCTL_STRUCT parent number name ctlflags ptr struct_type descr
.Fn SYSCTL_U8 parent number name ctlflags ptr val descr .Fn SYSCTL_U8 parent number name ctlflags ptr val descr
.Fn SYSCTL_U16 parent number name ctlflags ptr val descr .Fn SYSCTL_U16 parent number name ctlflags ptr val descr
@@ -607,6 +620,11 @@ If the
.Fa len .Fa len
argument in zero, the string length is computed at every access to the OID using argument in zero, the string length is computed at every access to the OID using
.Xr strlen 3 . .Xr strlen 3 .
+Use the
+.Fn SYSCTL_CONST_STRING
+macro or the
+.Fn SYSCTL_ADD_CONST_STRING
+function to add a sysctl for a constant string.
.Sh CREATING OPAQUE SYSCTLS .Sh CREATING OPAQUE SYSCTLS
The The
.Fn SYSCTL_OPAQUE .Fn SYSCTL_OPAQUE
@@ -658,6 +676,7 @@ Static sysctls are declared using one of the
.Fn SYSCTL_SBINTIME_MSEC , .Fn SYSCTL_SBINTIME_MSEC ,
.Fn SYSCTL_SBINTIME_USEC , .Fn SYSCTL_SBINTIME_USEC ,
.Fn SYSCTL_STRING , .Fn SYSCTL_STRING ,
+.Fn SYSCTL_CONST_STRING ,
.Fn SYSCTL_STRUCT , .Fn SYSCTL_STRUCT ,
.Fn SYSCTL_U8 , .Fn SYSCTL_U8 ,
.Fn SYSCTL_U16 , .Fn SYSCTL_U16 ,
@@ -690,6 +709,7 @@ Dynamic nodes are created using one of the
.Fn SYSCTL_ADD_SBINTIME_MSEC , .Fn SYSCTL_ADD_SBINTIME_MSEC ,
.Fn SYSCTL_ADD_SBINTIME_USEC , .Fn SYSCTL_ADD_SBINTIME_USEC ,
.Fn SYSCTL_ADD_STRING , .Fn SYSCTL_ADD_STRING ,
+.Fn SYSCTL_ADD_CONST_STRING ,
.Fn SYSCTL_ADD_STRUCT , .Fn SYSCTL_ADD_STRUCT ,
.Fn SYSCTL_ADD_U8 , .Fn SYSCTL_ADD_U8 ,
.Fn SYSCTL_ADD_U16 , .Fn SYSCTL_ADD_U16 ,
share/mk/src.opts.mk
@@ -399,7 +399,7 @@ BROKEN_OPTIONS+=NVME
BROKEN_OPTIONS+=BSD_CRTBEGIN BROKEN_OPTIONS+=BSD_CRTBEGIN
.endif .endif
-.if ${COMPILER_FEATURES:Mc++11} && ${__T} == "amd64"+.if ${COMPILER_FEATURES:Mc++11} && (${__T} == "amd64" || ${__T} == "i386")
__DEFAULT_YES_OPTIONS+=OPENMP __DEFAULT_YES_OPTIONS+=OPENMP
.else .else
__DEFAULT_NO_OPTIONS+=OPENMP __DEFAULT_NO_OPTIONS+=OPENMP
stand/common/interp_forth.c
@@ -283,6 +283,12 @@ bf_init(void)
/* try to load and run init file if present */ /* try to load and run init file if present */
if ((fd = open("/boot/boot.4th", O_RDONLY)) != -1) { if ((fd = open("/boot/boot.4th", O_RDONLY)) != -1) {
+#ifdef LOADER_VERIEXEC
+ if (verify_file(fd, "/boot/boot.4th", 0, VE_GUESS) < 0) {
+ close(fd);
+ return;
+ }
+#endif
(void)ficlExecFD(bf_vm, fd); (void)ficlExecFD(bf_vm, fd);
close(fd); close(fd);
} }
stand/common/load_elf.c
@@ -868,14 +868,16 @@ fake_modname(const char *name)
sp++; sp++;
else else
sp = name; sp = name;
- ep = strrchr(name, '.');+
- if (ep) {+ ep = strrchr(sp, '.');
- if (ep == name) {+ if (ep == NULL) {
- sp = invalid_name;+ ep = sp + strlen(sp);
- ep = invalid_name + sizeof(invalid_name) - 1;+ }
- }+ if (ep == sp) {
- } else+ sp = invalid_name;
- ep = name + strlen(name);+ ep = invalid_name + sizeof(invalid_name) - 1;
+ }
+
len = ep - sp; len = ep - sp;
fp = malloc(len + 1); fp = malloc(len + 1);
if (fp == NULL) if (fp == NULL)
sys/amd64/sgx/sgx.c
@@ -1075,6 +1075,12 @@ sgx_get_epc_area(struct sgx_softc *sc)
(cp[2] & 0xfffff000); (cp[2] & 0xfffff000);
sc->npages = sc->epc_size / SGX_PAGE_SIZE; sc->npages = sc->epc_size / SGX_PAGE_SIZE;
+ if (sc->epc_size == 0 || sc->epc_base == 0) {
+ printf("%s: Incorrect EPC data: EPC base %lx, size %lu\n",
+ __func__, sc->epc_base, sc->epc_size);
+ return (EINVAL);
+ }
+
if (cp[3] & 0xffff) if (cp[3] & 0xffff)
sc->enclave_size_max = (1 << ((cp[3] >> 8) & 0xff)); sc->enclave_size_max = (1 << ((cp[3] >> 8) & 0xff));
else else
sys/arm/include/platformvar.h
@@ -90,22 +90,22 @@ typedef struct fdt_platform_class fdt_platform_def_t;
extern platform_method_t fdt_platform_methods[]; extern platform_method_t fdt_platform_methods[];
-#define FDT_PLATFORM_DEF2(NAME, VAR_NAME, NAME_STR, size, compatible, \+#define FDT_PLATFORM_DEF2(NAME, VAR_NAME, NAME_STR, _size, _compatible, \
- delay) \+ _delay) \
-CTASSERT(delay > 0); \+CTASSERT(_delay > 0); \
static fdt_platform_def_t VAR_NAME ## _fdt_platform = { \ static fdt_platform_def_t VAR_NAME ## _fdt_platform = { \
.name = NAME_STR, \ .name = NAME_STR, \
.methods = fdt_platform_methods, \ .methods = fdt_platform_methods, \
- .fdt_compatible = compatible, \+ .fdt_compatible = _compatible, \
}; \ }; \
static kobj_class_t VAR_NAME ## _baseclasses[] = \ static kobj_class_t VAR_NAME ## _baseclasses[] = \
{ (kobj_class_t)&VAR_NAME ## _fdt_platform, NULL }; \ { (kobj_class_t)&VAR_NAME ## _fdt_platform, NULL }; \
static platform_def_t VAR_NAME ## _platform = { \ static platform_def_t VAR_NAME ## _platform = { \
- NAME_STR, \+ .name = NAME_STR, \
- NAME ## _methods, \+ .methods = NAME ## _methods, \
- size, \+ .size = _size, \
- VAR_NAME ## _baseclasses, \+ .baseclasses = VAR_NAME ## _baseclasses, \
- delay, \+ .delay_count = _delay, \
}; \ }; \
DATA_SET(platform_set, VAR_NAME ## _platform) DATA_SET(platform_set, VAR_NAME ## _platform)
sys/conf/NOTES
@@ -1071,7 +1071,7 @@ options NFSCL #Network File System client
options AUTOFS #Automounter filesystem options AUTOFS #Automounter filesystem
options CD9660 #ISO 9660 filesystem options CD9660 #ISO 9660 filesystem
options FDESCFS #File descriptor filesystem options FDESCFS #File descriptor filesystem
-options FUSE #FUSE support module+options FUSEFS #FUSEFS support module
options MSDOSFS #MS DOS File System (FAT, FAT32) options MSDOSFS #MS DOS File System (FAT, FAT32)
options NFSLOCKD #Network Lock Manager options NFSLOCKD #Network Lock Manager
options NFSD #Network Filesystem Server options NFSD #Network Filesystem Server
sys/conf/files
@@ -3494,15 +3494,15 @@ fs/fdescfs/fdesc_vfsops.c optional fdescfs
fs/fdescfs/fdesc_vnops.c optional fdescfs fs/fdescfs/fdesc_vnops.c optional fdescfs
fs/fifofs/fifo_vnops.c standard fs/fifofs/fifo_vnops.c standard
fs/cuse/cuse.c optional cuse fs/cuse/cuse.c optional cuse
-fs/fuse/fuse_device.c optional fuse+fs/fuse/fuse_device.c optional fusefs
-fs/fuse/fuse_file.c optional fuse+fs/fuse/fuse_file.c optional fusefs
-fs/fuse/fuse_internal.c optional fuse+fs/fuse/fuse_internal.c optional fusefs
-fs/fuse/fuse_io.c optional fuse+fs/fuse/fuse_io.c optional fusefs
-fs/fuse/fuse_ipc.c optional fuse+fs/fuse/fuse_ipc.c optional fusefs
-fs/fuse/fuse_main.c optional fuse+fs/fuse/fuse_main.c optional fusefs
-fs/fuse/fuse_node.c optional fuse+fs/fuse/fuse_node.c optional fusefs
-fs/fuse/fuse_vfsops.c optional fuse+fs/fuse/fuse_vfsops.c optional fusefs
-fs/fuse/fuse_vnops.c optional fuse+fs/fuse/fuse_vnops.c optional fusefs
fs/msdosfs/msdosfs_conv.c optional msdosfs fs/msdosfs/msdosfs_conv.c optional msdosfs
fs/msdosfs/msdosfs_denode.c optional msdosfs fs/msdosfs/msdosfs_denode.c optional msdosfs
fs/msdosfs/msdosfs_fat.c optional msdosfs fs/msdosfs/msdosfs_fat.c optional msdosfs
@@ -4398,9 +4398,9 @@ netpfil/ipfw/nat64/nat64clat.c optional inet inet6 ipfirewall \
netpfil/ipfw/nat64/nat64clat_control.c optional inet inet6 ipfirewall \ netpfil/ipfw/nat64/nat64clat_control.c optional inet inet6 ipfirewall \
ipfirewall_nat64 ipfirewall_nat64
netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \ netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \
- ipfirewall_nat64+ ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include"
netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \ netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \
- ipfirewall_nat64+ ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include"
netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \ netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \
ipfirewall_nat64 ipfirewall_nat64
netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \ netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \
sys/conf/kern.post.mk
@@ -140,6 +140,8 @@ kernel-obj:
.if !defined(NO_MODULES) .if !defined(NO_MODULES)
modules: modules-all modules: modules-all
+modules-depend: beforebuild
+modules-all: beforebuild
.if !defined(NO_MODULES_OBJ) .if !defined(NO_MODULES_OBJ)
modules-all modules-depend: modules-obj modules-all modules-depend: modules-obj
@@ -328,6 +330,11 @@ ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}}
.depend: .PRECIOUS ${SRCS} .depend: .PRECIOUS ${SRCS}
+.if ${COMPILER_TYPE} == "clang" || \
+ (${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 60000)
+_MAP_DEBUG_PREFIX= yes
+.endif
+
_ILINKS= machine _ILINKS= machine
.if ${MACHINE} != ${MACHINE_CPUARCH} && ${MACHINE} != "arm64" .if ${MACHINE} != ${MACHINE_CPUARCH} && ${MACHINE} != "arm64"
_ILINKS+= ${MACHINE_CPUARCH} _ILINKS+= ${MACHINE_CPUARCH}
@@ -337,12 +344,25 @@ _ILINKS+= x86
.endif .endif
# Ensure that the link exists without depending on it when it exists. # Ensure that the link exists without depending on it when it exists.
+# Ensure that debug info references the path in the source tree.
.for _link in ${_ILINKS} .for _link in ${_ILINKS}
.if !exists(${.OBJDIR}/${_link}) .if !exists(${.OBJDIR}/${_link})
${SRCS} ${CLEAN:M*.o}: ${_link} ${SRCS} ${CLEAN:M*.o}: ${_link}
.endif .endif
+.if defined(_MAP_DEBUG_PREFIX)
+.if ${_link} == "machine"
+CFLAGS+= -fdebug-prefix-map=./machine=${SYSDIR}/${MACHINE}/include
+.else
+CFLAGS+= -fdebug-prefix-map=./${_link}=${SYSDIR}/${_link}/include
+.endif
+.endif
.endfor .endfor
+.if defined(_MAP_DEBUG_PREFIX)
+# Ensure that DWARF info contains a full path for auto-generated headers.
+CFLAGS+= -fdebug-prefix-map=.=${.OBJDIR}
+.endif
+
${_ILINKS}: ${_ILINKS}:
@case ${.TARGET} in \ @case ${.TARGET} in \
machine) \ machine) \
sys/conf/kmod.mk
@@ -267,6 +267,11 @@ ${FULLPROG}: ${OBJS}
${OBJCOPY} --strip-debug ${.TARGET} ${OBJCOPY} --strip-debug ${.TARGET}
.endif .endif
+.if ${COMPILER_TYPE} == "clang" || \
+ (${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 60000)
+_MAP_DEBUG_PREFIX= yes
+.endif
+
_ILINKS=machine _ILINKS=machine
.if ${MACHINE} != ${MACHINE_CPUARCH} && ${MACHINE} != "arm64" .if ${MACHINE} != ${MACHINE_CPUARCH} && ${MACHINE} != "arm64"
_ILINKS+=${MACHINE_CPUARCH} _ILINKS+=${MACHINE_CPUARCH}
@@ -283,12 +288,25 @@ beforebuild: ${_ILINKS}
# Ensure that the links exist without depending on it when it exists which # Ensure that the links exist without depending on it when it exists which
# causes all the modules to be rebuilt when the directory pointed to changes. # causes all the modules to be rebuilt when the directory pointed to changes.
+# Ensure that debug info references the path in the source tree.
.for _link in ${_ILINKS} .for _link in ${_ILINKS}
.if !exists(${.OBJDIR}/${_link}) .if !exists(${.OBJDIR}/${_link})
OBJS_DEPEND_GUESS+= ${_link} OBJS_DEPEND_GUESS+= ${_link}
.endif .endif
+.if defined(_MAP_DEBUG_PREFIX)
+.if ${_link} == "machine"
+CFLAGS+= -fdebug-prefix-map=./machine=${SYSDIR}/${MACHINE}/include
+.else
+CFLAGS+= -fdebug-prefix-map=./${_link}=${SYSDIR}/${_link}/include
+.endif
+.endif
.endfor .endfor
+.if defined(_MAP_DEBUG_PREFIX)
+# Ensure that DWARF info contains a full path for auto-generated headers.
+CFLAGS+= -fdebug-prefix-map=.=${.OBJDIR}
+.endif
+
.NOPATH: ${_ILINKS} .NOPATH: ${_ILINKS}
${_ILINKS}: ${_ILINKS}:
sys/conf/options
@@ -253,7 +253,7 @@ CD9660 opt_dontuse.h
EXT2FS opt_dontuse.h EXT2FS opt_dontuse.h
FDESCFS opt_dontuse.h FDESCFS opt_dontuse.h
FFS opt_dontuse.h FFS opt_dontuse.h
-FUSE opt_dontuse.h+FUSEFS opt_dontuse.h
MSDOSFS opt_dontuse.h MSDOSFS opt_dontuse.h
NANDFS opt_dontuse.h NANDFS opt_dontuse.h
NULLFS opt_dontuse.h NULLFS opt_dontuse.h
sys/contrib/dev/ath/ath_hal/ar9300/ar9300_ani.c
@@ -1217,6 +1217,7 @@ ar9300_ani_ar_poll(struct ath_hal *ah, const HAL_NODE_STATS *stats,
cck_phy_err_cnt = OS_REG_READ(ah, AR_PHY_ERR_2); cck_phy_err_cnt = OS_REG_READ(ah, AR_PHY_ERR_2);
/* Populate HAL_ANISTATS */ /* Populate HAL_ANISTATS */
+ /* XXX TODO: are these correct? */
if (ani_stats) { if (ani_stats) {
ani_stats->cckphyerr_cnt = ani_stats->cckphyerr_cnt =
cck_phy_err_cnt - ani_state->cck_phy_err_count; cck_phy_err_cnt - ani_state->cck_phy_err_count;
@@ -1257,18 +1258,32 @@ ar9300_ani_ar_poll(struct ath_hal *ah, const HAL_NODE_STATS *stats,
return; return;
} }
+ /*
+ * Calculate the OFDM/CCK phy error rate over the listen time interval.
+ * This is used in subsequent math to see if the OFDM/CCK phy error rate
+ * is above or below the threshold checks.
+ */
+
ofdm_phy_err_rate = ofdm_phy_err_rate =
ani_state->ofdm_phy_err_count * 1000 / ani_state->listen_time; ani_state->ofdm_phy_err_count * 1000 / ani_state->listen_time;
cck_phy_err_rate = cck_phy_err_rate =
ani_state->cck_phy_err_count * 1000 / ani_state->listen_time; ani_state->cck_phy_err_count * 1000 / ani_state->listen_time;
HALDEBUG(ah, HAL_DEBUG_ANI, HALDEBUG(ah, HAL_DEBUG_ANI,
- "%s: listen_time=%d OFDM:%d errs=%d/s CCK:%d errs=%d/s ofdm_turn=%d\n",+ "%s: listen_time=%d (total: %d) OFDM:%d errs=%d/s CCK:%d errs=%d/s ofdm_turn=%d\n",
__func__, listen_time, __func__, listen_time,
+ ani_state->listen_time,
ani_state->ofdm_noise_immunity_level, ofdm_phy_err_rate, ani_state->ofdm_noise_immunity_level, ofdm_phy_err_rate,
ani_state->cck_noise_immunity_level, cck_phy_err_rate, ani_state->cck_noise_immunity_level, cck_phy_err_rate,
ani_state->ofdms_turn); ani_state->ofdms_turn);
+ /*
+ * Check for temporary noise spurs. This is intended to be used by
+ * rate control to check if we should try higher packet rates or not.
+ * If the noise period is short enough then we shouldn't avoid trying
+ * higher rates but if the noise is high/sustained then it's likely
+ * not a great idea to try the higher MCS rates.
+ */
if (ani_state->listen_time >= HAL_NOISE_DETECT_PERIOD) { if (ani_state->listen_time >= HAL_NOISE_DETECT_PERIOD) {
old_phy_noise_spur = ani_state->phy_noise_spur; old_phy_noise_spur = ani_state->phy_noise_spur;
if (ofdm_phy_err_rate <= ani_state->ofdm_trig_low && if (ofdm_phy_err_rate <= ani_state->ofdm_trig_low &&
@@ -1281,7 +1296,7 @@ ar9300_ani_ar_poll(struct ath_hal *ah, const HAL_NODE_STATS *stats,
} }
if (old_phy_noise_spur != ani_state->phy_noise_spur) { if (old_phy_noise_spur != ani_state->phy_noise_spur) {
HALDEBUG(ah, HAL_DEBUG_ANI, HALDEBUG(ah, HAL_DEBUG_ANI,
- "%s: enviroment change from %d to %d\n",+ "%s: environment change from %d to %d\n",
__func__, old_phy_noise_spur, ani_state->phy_noise_spur); __func__, old_phy_noise_spur, ani_state->phy_noise_spur);
} }
} }
@@ -1304,6 +1319,10 @@ ar9300_ani_ar_poll(struct ath_hal *ah, const HAL_NODE_STATS *stats,
ar9300_ani_lower_immunity(ah); ar9300_ani_lower_immunity(ah);
ani_state->ofdms_turn = !ani_state->ofdms_turn; ani_state->ofdms_turn = !ani_state->ofdms_turn;
} }
+ /*
+ * Force an ANI restart regardless of whether the lower immunity
+ * level was met.
+ */
HALDEBUG(ah, HAL_DEBUG_ANI, HALDEBUG(ah, HAL_DEBUG_ANI,
"%s: 1 listen_time=%d ofdm=%d/s cck=%d/s - " "%s: 1 listen_time=%d ofdm=%d/s cck=%d/s - "
"calling ar9300_ani_restart\n", "calling ar9300_ani_restart\n",
@@ -1337,6 +1356,13 @@ ar9300_ani_ar_poll(struct ath_hal *ah, const HAL_NODE_STATS *stats,
ani_state->ofdms_turn = AH_TRUE; ani_state->ofdms_turn = AH_TRUE;
} }
} }
+
+ /*
+ * Note that currently this poll function doesn't reset the listen
+ * time after it accumulates a second worth of error samples.
+ * It will continue to accumulate samples until a counter overflows,
+ * or a raise threshold is met, or 5 seconds passes.
+ */
} }
/* /*
sys/dev/ahci/ahci_pci.c
@@ -232,6 +232,8 @@ static const struct {
{0xa2828086, 0x00, "Intel Union Point", 0}, {0xa2828086, 0x00, "Intel Union Point", 0},
{0xa2868086, 0x00, "Intel Union Point (RAID)", 0}, {0xa2868086, 0x00, "Intel Union Point (RAID)", 0},
{0xa28e8086, 0x00, "Intel Union Point (RAID)", 0}, {0xa28e8086, 0x00, "Intel Union Point (RAID)", 0},
+ {0xa3528086, 0x00, "Intel Cannon Lake", 0},
+ {0xa3538086, 0x00, "Intel Cannon Lake", 0},
{0x23238086, 0x00, "Intel DH89xxCC", 0}, {0x23238086, 0x00, "Intel DH89xxCC", 0},
{0x2360197b, 0x00, "JMicron JMB360", 0}, {0x2360197b, 0x00, "JMicron JMB360", 0},
{0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE | AHCI_Q_1CH}, {0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE | AHCI_Q_1CH},
sys/dev/cpufreq/cpufreq_dt.c
@@ -166,7 +166,7 @@ cpufreq_dt_set(device_t dev, const struct cf_setting *set)
struct cpufreq_dt_softc *sc; struct cpufreq_dt_softc *sc;
const struct cpufreq_dt_opp *opp, *copp; const struct cpufreq_dt_opp *opp, *copp;
uint64_t freq; uint64_t freq;
- int error = 0;+ int uvolt, error;
sc = device_get_softc(dev); sc = device_get_softc(dev);
@@ -174,23 +174,38 @@ cpufreq_dt_set(device_t dev, const struct cf_setting *set)
device_printf(dev, "Can't get current clk freq\n"); device_printf(dev, "Can't get current clk freq\n");
return (ENXIO); return (ENXIO);
} }
+ /* Try to get current valtage by using regulator first. */
+ error = regulator_get_voltage(sc->reg, &uvolt);
+ if (error != 0) {
+ /*
+ * Try oppoints table as backup way. However,
+ * this is insufficient because the actual processor
+ * frequency may not be in the table. PLL frequency
+ * granularity can be different that granularity of
+ * oppoint table.
+ */
+ copp = cpufreq_dt_find_opp(sc->dev, freq);
+ if (copp == NULL) {
+ device_printf(dev,
+ "Can't find the current freq in opp\n");
+ return (ENOENT);
+ }
+ uvolt = copp->uvolt_target;
- DEBUG(sc->dev, "Current freq %ju\n", freq);
- DEBUG(sc->dev, "Target freq %ju\n", (uint64_t)set->freq * 1000000);
- copp = cpufreq_dt_find_opp(sc->dev, freq);
- if (copp == NULL) {
- device_printf(dev, "Can't find the current freq in opp\n");
- return (ENOENT);
} }
+
opp = cpufreq_dt_find_opp(sc->dev, set->freq * 1000000); opp = cpufreq_dt_find_opp(sc->dev, set->freq * 1000000);
if (opp == NULL) { if (opp == NULL) {
device_printf(dev, "Couldn't find an opp for this freq\n"); device_printf(dev, "Couldn't find an opp for this freq\n");
return (EINVAL); return (EINVAL);
} }
+ DEBUG(sc->dev, "Current freq %ju, uvolt: %d\n", freq, uvolt);
+ DEBUG(sc->dev, "Target freq %ju, , uvolt: %d\n",
+ opp->freq, opp->uvolt_target);
- if (copp->uvolt_target < opp->uvolt_target) {+ if (uvolt < opp->uvolt_target) {
DEBUG(dev, "Changing regulator from %u to %u\n", DEBUG(dev, "Changing regulator from %u to %u\n",
- copp->uvolt_target, opp->uvolt_target);+ uvolt, opp->uvolt_target);
error = regulator_set_voltage(sc->reg, error = regulator_set_voltage(sc->reg,
opp->uvolt_min, opp->uvolt_min,
opp->uvolt_max); opp->uvolt_max);
@@ -201,7 +216,7 @@ cpufreq_dt_set(device_t dev, const struct cf_setting *set)
} }
DEBUG(dev, "Setting clk to %ju\n", opp->freq); DEBUG(dev, "Setting clk to %ju\n", opp->freq);
- error = clk_set_freq(sc->clk, opp->freq, 0);+ error = clk_set_freq(sc->clk, opp->freq, CLK_SET_ROUND_DOWN);
if (error != 0) { if (error != 0) {
DEBUG(dev, "Failed, backout\n"); DEBUG(dev, "Failed, backout\n");
/* Restore previous voltage (best effort) */ /* Restore previous voltage (best effort) */
@@ -211,7 +226,9 @@ cpufreq_dt_set(device_t dev, const struct cf_setting *set)
return (ENXIO); return (ENXIO);
} }
- if (copp->uvolt_target > opp->uvolt_target) {+ if (uvolt > opp->uvolt_target) {
+ DEBUG(dev, "Changing regulator from %u to %u\n",
+ uvolt, opp->uvolt_target);
error = regulator_set_voltage(sc->reg, error = regulator_set_voltage(sc->reg,
opp->uvolt_min, opp->uvolt_min,
opp->uvolt_max); opp->uvolt_max);
@@ -219,8 +236,7 @@ cpufreq_dt_set(device_t dev, const struct cf_setting *set)
DEBUG(dev, "Failed to switch regulator to %d\n", DEBUG(dev, "Failed to switch regulator to %d\n",
opp->uvolt_target); opp->uvolt_target);
/* Restore previous CPU frequency (best effort) */ /* Restore previous CPU frequency (best effort) */
- (void)clk_set_freq(sc->clk,+ (void)clk_set_freq(sc->clk, copp->freq, 0);
- copp->freq, 0);
return (ENXIO); return (ENXIO);
} }
} }
@@ -277,7 +293,8 @@ cpufreq_dt_identify(driver_t *driver, device_t parent)
/* The cpu@0 node must have the following properties */ /* The cpu@0 node must have the following properties */
if (!OF_hasprop(node, "clocks") || if (!OF_hasprop(node, "clocks") ||
- !OF_hasprop(node, "cpu-supply"))+ (!OF_hasprop(node, "cpu-supply") &&
+ !OF_hasprop(node, "cpu0-supply")))
return; return;
if (!OF_hasprop(node, "operating-points") && if (!OF_hasprop(node, "operating-points") &&
@@ -299,7 +316,9 @@ cpufreq_dt_probe(device_t dev)
node = ofw_bus_get_node(device_get_parent(dev)); node = ofw_bus_get_node(device_get_parent(dev));
if (!OF_hasprop(node, "clocks") || if (!OF_hasprop(node, "clocks") ||
- !OF_hasprop(node, "cpu-supply"))+ (!OF_hasprop(node, "cpu-supply") &&
+ !OF_hasprop(node, "cpu0-supply")))
+
return (ENXIO); return (ENXIO);
if (!OF_hasprop(node, "operating-points") && if (!OF_hasprop(node, "operating-points") &&
@@ -439,9 +458,12 @@ cpufreq_dt_attach(device_t dev)
if (regulator_get_by_ofw_property(dev, node, if (regulator_get_by_ofw_property(dev, node,
"cpu-supply", &sc->reg) != 0) { "cpu-supply", &sc->reg) != 0) {
- device_printf(dev, "no regulator for %s\n",+ if (regulator_get_by_ofw_property(dev, node,
- ofw_bus_get_name(device_get_parent(dev)));+ "cpu0-supply", &sc->reg) != 0) {
- return (ENXIO);+ device_printf(dev, "no regulator for %s\n",
+ ofw_bus_get_name(device_get_parent(dev)));
+ return (ENXIO);
+ }
} }
if (clk_get_by_ofw_index(dev, node, 0, &sc->clk) != 0) { if (clk_get_by_ofw_index(dev, node, 0, &sc->clk) != 0) {
sys/dev/cxgbe/adapter.h
@@ -194,8 +194,10 @@ struct vi_info {
int if_flags; int if_flags;
uint16_t *rss, *nm_rss; uint16_t *rss, *nm_rss;
- int smt_idx; /* for convenience */+ uint16_t viid; /* opaque VI identifier */
- uint16_t viid;+ uint16_t smt_idx;
+ uint16_t vin;
+ uint8_t vfvld;
int16_t xact_addr_filt;/* index of exact MAC address filter */ int16_t xact_addr_filt;/* index of exact MAC address filter */
uint16_t rss_size; /* size of VI's RSS table slice */ uint16_t rss_size; /* size of VI's RSS table slice */
uint16_t rss_base; /* start of VI's RSS table slice */ uint16_t rss_base; /* start of VI's RSS table slice */
sys/dev/cxgbe/common/common.h
@@ -375,8 +375,9 @@ struct adapter_params {
uint32_t mps_bg_map; /* rx buffer group map for all ports (upto 4) */ uint32_t mps_bg_map; /* rx buffer group map for all ports (upto 4) */
- bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */+ bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */
- bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */+ bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */
+ bool viid_smt_extn_support; /* FW returns vin, vfvld & smt index? */
}; };
#define CHELSIO_T4 0x4 #define CHELSIO_T4 0x4
@@ -756,10 +757,11 @@ int t4_cfg_pfvf(struct adapter *adap, unsigned int mbox, unsigned int pf,
int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox, int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox,
unsigned int port, unsigned int pf, unsigned int vf, unsigned int port, unsigned int pf, unsigned int vf,
unsigned int nmac, u8 *mac, u16 *rss_size, unsigned int nmac, u8 *mac, u16 *rss_size,
+ uint8_t *vfvld, uint16_t *vin,
unsigned int portfunc, unsigned int idstype); unsigned int portfunc, unsigned int idstype);
int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port, int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac, unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac,
- u16 *rss_size);+ u16 *rss_size, uint8_t *vfvld, uint16_t *vin);
int t4_free_vi(struct adapter *adap, unsigned int mbox, int t4_free_vi(struct adapter *adap, unsigned int mbox,
unsigned int pf, unsigned int vf, unsigned int pf, unsigned int vf,
unsigned int viid); unsigned int viid);
@@ -770,7 +772,7 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, unsigned int viid
bool free, unsigned int naddr, const u8 **addr, u16 *idx, bool free, unsigned int naddr, const u8 **addr, u16 *idx,
u64 *hash, bool sleep_ok); u64 *hash, bool sleep_ok);
int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid, int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
- int idx, const u8 *addr, bool persist, bool add_smt);+ int idx, const u8 *addr, bool persist, uint16_t *smt_idx);
int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid, int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid,
bool ucast, u64 vec, bool sleep_ok); bool ucast, u64 vec, bool sleep_ok);
int t4_enable_vi_params(struct adapter *adap, unsigned int mbox, int t4_enable_vi_params(struct adapter *adap, unsigned int mbox,
sys/dev/cxgbe/common/t4_hw.c
@@ -7794,6 +7794,7 @@ int t4_cfg_pfvf(struct adapter *adap, unsigned int mbox, unsigned int pf,
int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox, int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox,
unsigned int port, unsigned int pf, unsigned int vf, unsigned int port, unsigned int pf, unsigned int vf,
unsigned int nmac, u8 *mac, u16 *rss_size, unsigned int nmac, u8 *mac, u16 *rss_size,
+ uint8_t *vfvld, uint16_t *vin,
unsigned int portfunc, unsigned int idstype) unsigned int portfunc, unsigned int idstype)
{ {
int ret; int ret;
@@ -7814,6 +7815,7 @@ int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox,
ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c);
if (ret) if (ret)
return ret; return ret;
+ ret = G_FW_VI_CMD_VIID(be16_to_cpu(c.type_to_viid));
if (mac) { if (mac) {
memcpy(mac, c.mac, sizeof(c.mac)); memcpy(mac, c.mac, sizeof(c.mac));
@@ -7830,7 +7832,18 @@ int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox,
} }
if (rss_size) if (rss_size)
*rss_size = G_FW_VI_CMD_RSSSIZE(be16_to_cpu(c.norss_rsssize)); *rss_size = G_FW_VI_CMD_RSSSIZE(be16_to_cpu(c.norss_rsssize));
- return G_FW_VI_CMD_VIID(be16_to_cpu(c.type_to_viid));+ if (vfvld) {
+ *vfvld = adap->params.viid_smt_extn_support ?
+ G_FW_VI_CMD_VFVLD(be32_to_cpu(c.alloc_to_len16)) :
+ G_FW_VIID_VIVLD(ret);
+ }
+ if (vin) {
+ *vin = adap->params.viid_smt_extn_support ?
+ G_FW_VI_CMD_VIN(be32_to_cpu(c.alloc_to_len16)) :
+ G_FW_VIID_VIN(ret);
+ }
+
+ return ret;
} }
/** /**
@@ -7850,10 +7863,10 @@ int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox,
*/ */
int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port, int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac, unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac,
- u16 *rss_size)+ u16 *rss_size, uint8_t *vfvld, uint16_t *vin)
{ {
return t4_alloc_vi_func(adap, mbox, port, pf, vf, nmac, mac, rss_size, return t4_alloc_vi_func(adap, mbox, port, pf, vf, nmac, mac, rss_size,
- FW_VI_FUNC_ETH, 0);+ vfvld, vin, FW_VI_FUNC_ETH, 0);
} }
/** /**
@@ -8030,7 +8043,7 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox,
* @idx: index of existing filter for old value of MAC address, or -1 * @idx: index of existing filter for old value of MAC address, or -1
* @addr: the new MAC address value * @addr: the new MAC address value
* @persist: whether a new MAC allocation should be persistent * @persist: whether a new MAC allocation should be persistent
- * @add_smt: if true also add the address to the HW SMT+ * @smt_idx: add MAC to SMT and return its index, or NULL
* *
* Modifies an exact-match filter and sets it to the new MAC address if * Modifies an exact-match filter and sets it to the new MAC address if
* @idx >= 0, or adds the MAC address to a new filter if @idx < 0. In the * @idx >= 0, or adds the MAC address to a new filter if @idx < 0. In the
@@ -8045,7 +8058,7 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox,
* MAC value. Note that this index may differ from @idx. * MAC value. Note that this index may differ from @idx.
*/ */
int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid, int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
- int idx, const u8 *addr, bool persist, bool add_smt)+ int idx, const u8 *addr, bool persist, uint16_t *smt_idx)
{ {
int ret, mode; int ret, mode;
struct fw_vi_mac_cmd c; struct fw_vi_mac_cmd c;
@@ -8054,7 +8067,7 @@ int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
if (idx < 0) /* new allocation */ if (idx < 0) /* new allocation */
idx = persist ? FW_VI_MAC_ADD_PERSIST_MAC : FW_VI_MAC_ADD_MAC; idx = persist ? FW_VI_MAC_ADD_PERSIST_MAC : FW_VI_MAC_ADD_MAC;
- mode = add_smt ? FW_VI_MAC_SMT_AND_MPSTCAM : FW_VI_MAC_MPS_TCAM_ENTRY;+ mode = smt_idx ? FW_VI_MAC_SMT_AND_MPSTCAM : FW_VI_MAC_MPS_TCAM_ENTRY;
memset(&c, 0, sizeof(c)); memset(&c, 0, sizeof(c));
c.op_to_viid = cpu_to_be32(V_FW_CMD_OP(FW_VI_MAC_CMD) | c.op_to_viid = cpu_to_be32(V_FW_CMD_OP(FW_VI_MAC_CMD) |
@@ -8071,6 +8084,16 @@ int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
ret = G_FW_VI_MAC_CMD_IDX(be16_to_cpu(p->valid_to_idx)); ret = G_FW_VI_MAC_CMD_IDX(be16_to_cpu(p->valid_to_idx));
if (ret >= max_mac_addr) if (ret >= max_mac_addr)
ret = -ENOMEM; ret = -ENOMEM;
+ if (smt_idx) {
+ if (adap->params.viid_smt_extn_support)
+ *smt_idx = G_FW_VI_MAC_CMD_SMTID(be32_to_cpu(c.op_to_viid));
+ else {
+ if (chip_id(adap) <= CHELSIO_T5)
+ *smt_idx = (viid & M_FW_VIID_VIN) << 1;
+ else
+ *smt_idx = viid & M_FW_VIID_VIN;
+ }
+ }
} }
return ret; return ret;
} }
@@ -9331,9 +9354,9 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id)
{ {
u8 addr[6]; u8 addr[6];
int ret, i, j; int ret, i, j;
- u16 rss_size;
struct port_info *p = adap2pinfo(adap, port_id); struct port_info *p = adap2pinfo(adap, port_id);
u32 param, val; u32 param, val;
+ struct vi_info *vi = &p->vi[0];
for (i = 0, j = -1; i <= p->port_id; i++) { for (i = 0, j = -1; i <= p->port_id; i++) {
do { do {
@@ -9351,27 +9374,23 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id)
t4_update_port_info(p); t4_update_port_info(p);
} }
- ret = t4_alloc_vi(adap, mbox, j, pf, vf, 1, addr, &rss_size);+ ret = t4_alloc_vi(adap, mbox, j, pf, vf, 1, addr, &vi->rss_size,
+ &vi->vfvld, &vi->vin);
if (ret < 0) if (ret < 0)
return ret; return ret;
- p->vi[0].viid = ret;+ vi->viid = ret;
- if (chip_id(adap) <= CHELSIO_T5)
- p->vi[0].smt_idx = (ret & 0x7f) << 1;
- else
- p->vi[0].smt_idx = (ret & 0x7f);
- p->vi[0].rss_size = rss_size;
t4_os_set_hw_addr(p, addr); t4_os_set_hw_addr(p, addr);
param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
- V_FW_PARAMS_PARAM_YZ(p->vi[0].viid);+ V_FW_PARAMS_PARAM_YZ(vi->viid);
ret = t4_query_params(adap, mbox, pf, vf, 1, &param, &val); ret = t4_query_params(adap, mbox, pf, vf, 1, &param, &val);
if (ret) if (ret)
- p->vi[0].rss_base = 0xffff;+ vi->rss_base = 0xffff;
else { else {
/* MPASS((val >> 16) == rss_size); */ /* MPASS((val >> 16) == rss_size); */
- p->vi[0].rss_base = val & 0xffff;+ vi->rss_base = val & 0xffff;
} }
return 0; return 0;
sys/dev/cxgbe/firmware/t4fw_interface.h
@@ -4798,6 +4798,7 @@ enum fw_params_param_dev {
FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24, FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24,
FW_PARAMS_PARAM_DEV_ADD_SMAC = 0x25, FW_PARAMS_PARAM_DEV_ADD_SMAC = 0x25,
FW_PARAMS_PARAM_DEV_HPFILTER_REGION_SUPPORT = 0x26, FW_PARAMS_PARAM_DEV_HPFILTER_REGION_SUPPORT = 0x26,
+ FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
}; };
/* /*
@@ -6502,6 +6503,19 @@ struct fw_vi_cmd {
(((x) >> S_FW_VI_CMD_FREE) & M_FW_VI_CMD_FREE) (((x) >> S_FW_VI_CMD_FREE) & M_FW_VI_CMD_FREE)
#define F_FW_VI_CMD_FREE V_FW_VI_CMD_FREE(1U) #define F_FW_VI_CMD_FREE V_FW_VI_CMD_FREE(1U)
+#define S_FW_VI_CMD_VFVLD 24
+#define M_FW_VI_CMD_VFVLD 0x1
+#define V_FW_VI_CMD_VFVLD(x) ((x) << S_FW_VI_CMD_VFVLD)
+#define G_FW_VI_CMD_VFVLD(x) \
+ (((x) >> S_FW_VI_CMD_VFVLD) & M_FW_VI_CMD_VFVLD)
+#define F_FW_VI_CMD_VFVLD V_FW_VI_CMD_VFVLD(1U)
+
+#define S_FW_VI_CMD_VIN 16
+#define M_FW_VI_CMD_VIN 0xff
+#define V_FW_VI_CMD_VIN(x) ((x) << S_FW_VI_CMD_VIN)
+#define G_FW_VI_CMD_VIN(x) \
+ (((x) >> S_FW_VI_CMD_VIN) & M_FW_VI_CMD_VIN)
+
#define S_FW_VI_CMD_TYPE 15 #define S_FW_VI_CMD_TYPE 15
#define M_FW_VI_CMD_TYPE 0x1 #define M_FW_VI_CMD_TYPE 0x1
#define V_FW_VI_CMD_TYPE(x) ((x) << S_FW_VI_CMD_TYPE) #define V_FW_VI_CMD_TYPE(x) ((x) << S_FW_VI_CMD_TYPE)
@@ -6608,6 +6622,12 @@ struct fw_vi_mac_cmd {
} u; } u;
}; };
+#define S_FW_VI_MAC_CMD_SMTID 12
+#define M_FW_VI_MAC_CMD_SMTID 0xff
+#define V_FW_VI_MAC_CMD_SMTID(x) ((x) << S_FW_VI_MAC_CMD_SMTID)
+#define G_FW_VI_MAC_CMD_SMTID(x) \
+ (((x) >> S_FW_VI_MAC_CMD_SMTID) & M_FW_VI_MAC_CMD_SMTID)
+
#define S_FW_VI_MAC_CMD_VIID 0 #define S_FW_VI_MAC_CMD_VIID 0
#define M_FW_VI_MAC_CMD_VIID 0xfff #define M_FW_VI_MAC_CMD_VIID 0xfff
#define V_FW_VI_MAC_CMD_VIID(x) ((x) << S_FW_VI_MAC_CMD_VIID) #define V_FW_VI_MAC_CMD_VIID(x) ((x) << S_FW_VI_MAC_CMD_VIID)
sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
@@ -859,7 +859,6 @@ struct c4iw_ep {
unsigned int mpa_pkt_len; unsigned int mpa_pkt_len;
u32 ird; u32 ird;
u32 ord; u32 ord;
- u32 smac_idx;
u32 tx_chan; u32 tx_chan;
u32 mtu; u32 mtu;
u16 mss; u16 mss;
sys/dev/cxgbe/t4_main.c
@@ -2486,17 +2486,13 @@ alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
device_get_nameunit(vi->dev))); device_get_nameunit(vi->dev)));
func = vi_mac_funcs[index]; func = vi_mac_funcs[index];
rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
- vi->hw_addr, &vi->rss_size, func, 0);+ vi->hw_addr, &vi->rss_size, &vi->vfvld, &vi->vin, func, 0);
if (rc < 0) { if (rc < 0) {
device_printf(vi->dev, "failed to allocate virtual interface %d" device_printf(vi->dev, "failed to allocate virtual interface %d"
"for port %d: %d\n", index, pi->port_id, -rc); "for port %d: %d\n", index, pi->port_id, -rc);
return (-rc); return (-rc);
} }
vi->viid = rc; vi->viid = rc;
- if (chip_id(sc) <= CHELSIO_T5)
- vi->smt_idx = (rc & 0x7f) << 1;
- else
- vi->smt_idx = (rc & 0x7f);
if (vi->rss_size == 1) { if (vi->rss_size == 1) {
/* /*
@@ -4113,6 +4109,15 @@ set_params__pre_init(struct adapter *sc)
} }
} }
+ /* Enable opaque VIIDs with firmwares that support it. */
+ param = FW_PARAM_DEV(OPAQUE_VIID_SMT_EXTN);
+ val = 1;
+ rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
+ if (rc == 0 && val == 1)
+ sc->params.viid_smt_extn_support = true;
+ else
+ sc->params.viid_smt_extn_support = false;
+
return (rc); return (rc);
} }
@@ -4825,7 +4830,7 @@ update_mac_settings(struct ifnet *ifp, int flags)
bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
- ucaddr, true, true);+ ucaddr, true, &vi->smt_idx);
if (rc < 0) { if (rc < 0) {
rc = -rc; rc = -rc;
if_printf(ifp, "change_mac failed: %d\n", rc); if_printf(ifp, "change_mac failed: %d\n", rc);
@@ -5746,7 +5751,7 @@ get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
#define A_PL_INDIR_DATA 0x1fc #define A_PL_INDIR_DATA 0x1fc
static uint64_t static uint64_t
-read_vf_stat(struct adapter *sc, unsigned int viid, int reg)+read_vf_stat(struct adapter *sc, u_int vin, int reg)
{ {
u32 stats[2]; u32 stats[2];
@@ -5756,8 +5761,7 @@ read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4)); stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
} else { } else {
t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
- V_PL_VFID(G_FW_VIID_VIN(viid)) |+ V_PL_VFID(vin) | V_PL_ADDR(VF_MPS_REG(reg)));
- V_PL_ADDR(VF_MPS_REG(reg)));
stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
} }
@@ -5765,12 +5769,11 @@ read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
} }
static void static void
-t4_get_vi_stats(struct adapter *sc, unsigned int viid,+t4_get_vi_stats(struct adapter *sc, u_int vin, struct fw_vi_stats_vf *stats)
- struct fw_vi_stats_vf *stats)
{ {
#define GET_STAT(name) \ #define GET_STAT(name) \
- read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)+ read_vf_stat(sc, vin, A_MPS_VF_STAT_##name##_L)
stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES);
stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES);
@@ -5793,12 +5796,11 @@ t4_get_vi_stats(struct adapter *sc, unsigned int viid,
} }
static void static void
-t4_clr_vi_stats(struct adapter *sc, unsigned int viid)+t4_clr_vi_stats(struct adapter *sc, u_int vin)
{ {
int reg; int reg;
- t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |+ t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(vin) |
- V_PL_VFID(G_FW_VIID_VIN(viid)) |
V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
@@ -5820,7 +5822,7 @@ vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
return; return;
mtx_lock(&sc->reg_lock); mtx_lock(&sc->reg_lock);
- t4_get_vi_stats(sc, vi->viid, &vi->stats);+ t4_get_vi_stats(sc, vi->vin, &vi->stats);
getmicrotime(&vi->last_refreshed); getmicrotime(&vi->last_refreshed);
mtx_unlock(&sc->reg_lock); mtx_unlock(&sc->reg_lock);
} }
@@ -10055,7 +10057,7 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
mtx_lock(&sc->reg_lock); mtx_lock(&sc->reg_lock);
for_each_vi(pi, v, vi) { for_each_vi(pi, v, vi) {
if (vi->flags & VI_INIT_DONE) if (vi->flags & VI_INIT_DONE)
- t4_clr_vi_stats(sc, vi->viid);+ t4_clr_vi_stats(sc, vi->vin);
} }
bg_map = pi->mps_bg_map; bg_map = pi->mps_bg_map;
v = 0; /* reuse */ v = 0; /* reuse */
sys/dev/cxgbe/t4_sched.c
@@ -799,9 +799,8 @@ failed:
cst->tx_total = cst->tx_credits; cst->tx_total = cst->tx_credits;
cst->plen = 0; cst->plen = 0;
cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
- V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) |+ V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
- V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) |+ V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
- V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid)));
/* /*
* Queues will be selected later when the connection flowid is available. * Queues will be selected later when the connection flowid is available.
sys/dev/cxgbe/t4_sge.c
@@ -3640,9 +3640,8 @@ alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx,
nm_txq->nid = idx; nm_txq->nid = idx;
nm_txq->iqidx = iqidx; nm_txq->iqidx = iqidx;
nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
- V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) |+ V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
- V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) |+ V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
- V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid)));
nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID;
snprintf(name, sizeof(name), "%d", idx); snprintf(name, sizeof(name), "%d", idx);
@@ -4043,10 +4042,8 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx,
V_TXPKT_INTF(pi->tx_chan)); V_TXPKT_INTF(pi->tx_chan));
else else
txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
- V_TXPKT_INTF(pi->tx_chan) |+ V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
- V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) |+ V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
- V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) |
- V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid)));
txq->tc_idx = -1; txq->tc_idx = -1;
txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE,
M_ZERO | M_WAITOK); M_ZERO | M_WAITOK);
@@ -5657,7 +5654,7 @@ send_etid_flowc_wr(struct cxgbe_snd_tag *cst, struct port_info *pi,
struct vi_info *vi) struct vi_info *vi)
{ {
struct wrq_cookie cookie; struct wrq_cookie cookie;
- u_int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;+ u_int pfvf = pi->adapter->pf << S_FW_VIID_PFN;
struct fw_flowc_wr *flowc; struct fw_flowc_wr *flowc;
mtx_assert(&cst->lock, MA_OWNED); mtx_assert(&cst->lock, MA_OWNED);
sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -107,7 +107,7 @@ send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp)
struct vi_info *vi = toep->vi; struct vi_info *vi = toep->vi;
struct port_info *pi = vi->pi; struct port_info *pi = vi->pi;
struct adapter *sc = pi->adapter; struct adapter *sc = pi->adapter;
- unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;+ unsigned int pfvf = sc->pf << S_FW_VIID_PFN;
struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
sys/dev/cxgbe/tom/t4_listen.c
@@ -356,7 +356,7 @@ send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
struct sge_wrq *ofld_txq; struct sge_wrq *ofld_txq;
struct sge_ofld_rxq *ofld_rxq; struct sge_ofld_rxq *ofld_rxq;
const int nparams = 6; const int nparams = 6;
- unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;+ const u_int pfvf = sc->pf << S_FW_VIID_PFN;
INP_WLOCK_ASSERT(synqe->lctx->inp); INP_WLOCK_ASSERT(synqe->lctx->inp);
sys/dev/cxgbe/tom/t4_tom.c
@@ -633,7 +633,6 @@ select_ntuple(struct vi_info *vi, struct l2t_entry *e)
{ {
struct adapter *sc = vi->pi->adapter; struct adapter *sc = vi->pi->adapter;
struct tp_params *tp = &sc->params.tp; struct tp_params *tp = &sc->params.tp;
- uint16_t viid = vi->viid;
uint64_t ntuple = 0; uint64_t ntuple = 0;
/* /*
@@ -650,12 +649,9 @@ select_ntuple(struct vi_info *vi, struct l2t_entry *e)
ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift;
if (tp->vnic_shift >= 0 && tp->ingress_config & F_VNIC) { if (tp->vnic_shift >= 0 && tp->ingress_config & F_VNIC) {
- uint32_t vf = G_FW_VIID_VIN(viid);+ ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vi->vin) |
- uint32_t pf = G_FW_VIID_PFN(viid);+ V_FT_VNID_ID_PF(sc->pf) | V_FT_VNID_ID_VLD(vi->vfvld)) <<
- uint32_t vld = G_FW_VIID_VIVLD(viid);+ tp->vnic_shift;
-
- ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vf) | V_FT_VNID_ID_PF(pf) |
- V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift;
} }
if (is_t4(sc)) if (is_t4(sc))
sys/dev/e1000/if_em.c
@@ -1270,14 +1270,7 @@ em_if_init(if_ctx_t ctx)
/* Setup Multicast table */ /* Setup Multicast table */
em_if_multi_set(ctx); em_if_multi_set(ctx);
- /*+ adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
- * Figure out the desired mbuf
- * pool for doing jumbos
- */
- if (adapter->hw.mac.max_frame_size <= 2048)
- adapter->rx_mbuf_sz = MCLBYTES;
- else
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
em_initialize_receive_unit(ctx); em_initialize_receive_unit(ctx);
/* Use real VLAN Filter support? */ /* Use real VLAN Filter support? */
sys/dev/extres/phy/phy.c
@@ -517,7 +517,7 @@ phy_get_by_ofw_property(device_t consumer_dev, phandle_t cnode, char *name,
ncells = OF_getencprop_alloc_multi(cnode, name, sizeof(pcell_t), ncells = OF_getencprop_alloc_multi(cnode, name, sizeof(pcell_t),
(void **)&cells); (void **)&cells);
if (ncells < 1) if (ncells < 1)
- return (ENXIO);+ return (ENOENT);
/* Tranlate provider to device. */ /* Tranlate provider to device. */
phydev = OF_device_from_xref(cells[0]); phydev = OF_device_from_xref(cells[0]);
sys/dev/extres/regulator/regulator.c
@@ -1091,7 +1091,7 @@ regulator_get_by_ofw_property(device_t cdev, phandle_t cnode, char *name,
ncells = OF_getencprop_alloc_multi(cnode, name, sizeof(*cells), ncells = OF_getencprop_alloc_multi(cnode, name, sizeof(*cells),
(void **)&cells); (void **)&cells);
if (ncells <= 0) if (ncells <= 0)
- return (ENXIO);+ return (ENOENT);
/* Translate xref to device */ /* Translate xref to device */
regdev = OF_device_from_xref(cells[0]); regdev = OF_device_from_xref(cells[0]);
sys/dev/extres/syscon/syscon.c
@@ -238,7 +238,7 @@ syscon_get_by_ofw_property(device_t cdev, phandle_t cnode, char *name,
ncells = OF_getencprop_alloc_multi(cnode, name, sizeof(pcell_t), ncells = OF_getencprop_alloc_multi(cnode, name, sizeof(pcell_t),
(void **)&cells); (void **)&cells);
if (ncells < 1) if (ncells < 1)
- return (ENXIO);+ return (ENOENT);
/* Translate to syscon node. */ /* Translate to syscon node. */
SYSCON_TOPO_SLOCK(); SYSCON_TOPO_SLOCK();
sys/dev/ichsmb/ichsmb_pci.c
@@ -104,6 +104,7 @@ __FBSDID("$FreeBSD$");
#define ID_LEWISBURG 0xa1a3 #define ID_LEWISBURG 0xa1a3
#define ID_LEWISBURG2 0xa223 #define ID_LEWISBURG2 0xa223
#define ID_KABYLAKE 0xa2a3 #define ID_KABYLAKE 0xa2a3
+#define ID_CANNONLAKE 0xa323
static const struct ichsmb_device { static const struct ichsmb_device {
uint16_t id; uint16_t id;
@@ -148,6 +149,7 @@ static const struct ichsmb_device {
{ ID_LEWISBURG, "Intel Lewisburg SMBus controller" }, { ID_LEWISBURG, "Intel Lewisburg SMBus controller" },
{ ID_LEWISBURG2,"Intel Lewisburg SMBus controller" }, { ID_LEWISBURG2,"Intel Lewisburg SMBus controller" },
{ ID_KABYLAKE, "Intel Kaby Lake SMBus controller" }, { ID_KABYLAKE, "Intel Kaby Lake SMBus controller" },
+ { ID_CANNONLAKE,"Intel Cannon Lake SMBus controller" },
{ 0, NULL }, { 0, NULL },
}; };
sys/dev/ixgbe/if_ix.c
@@ -2880,10 +2880,7 @@ ixgbe_if_init(if_ctx_t ctx)
ixgbe_if_multi_set(ctx); ixgbe_if_multi_set(ctx);
/* Determine the correct mbuf pool, based on frame size */ /* Determine the correct mbuf pool, based on frame size */
- if (adapter->max_frame_size <= MCLBYTES)+ adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
- adapter->rx_mbuf_sz = MCLBYTES;
- else
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
/* Configure RX settings */ /* Configure RX settings */
ixgbe_initialize_receive_units(ctx); ixgbe_initialize_receive_units(ctx);
sys/dev/ixgbe/if_ixv.c
@@ -220,7 +220,7 @@ static struct if_shared_ctx ixv_sctx_init = {
.isc_vendor_info = ixv_vendor_info_array, .isc_vendor_info = ixv_vendor_info_array,
.isc_driver_version = ixv_driver_version, .isc_driver_version = ixv_driver_version,
.isc_driver = &ixv_if_driver, .isc_driver = &ixv_if_driver,
- .isc_flags = IFLIB_TSO_INIT_IP,+ .isc_flags = IFLIB_IS_VF | IFLIB_TSO_INIT_IP,
.isc_nrxd_min = {MIN_RXD}, .isc_nrxd_min = {MIN_RXD},
.isc_ntxd_min = {MIN_TXD}, .isc_ntxd_min = {MIN_TXD},
@@ -629,14 +629,7 @@ ixv_if_init(if_ctx_t ctx)
/* Setup Multicast table */ /* Setup Multicast table */
ixv_if_multi_set(ctx); ixv_if_multi_set(ctx);
- /*+ adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
- * Determine the correct mbuf pool
- * for doing jumbo/headersplit
- */
- if (ifp->if_mtu > ETHERMTU)
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
- else
- adapter->rx_mbuf_sz = MCLBYTES;
/* Configure RX settings */ /* Configure RX settings */
ixv_initialize_receive_units(ctx); ixv_initialize_receive_units(ctx);
sys/dev/ixl/if_iavf.c
@@ -614,7 +614,6 @@ iavf_send_vc_msg(struct iavf_sc *sc, u32 op)
static void static void
iavf_init_queues(struct ixl_vsi *vsi) iavf_init_queues(struct ixl_vsi *vsi)
{ {
- if_softc_ctx_t scctx = vsi->shared;
struct ixl_tx_queue *tx_que = vsi->tx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues;
struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_rx_queue *rx_que = vsi->rx_queues;
struct rx_ring *rxr; struct rx_ring *rxr;
@@ -625,10 +624,7 @@ iavf_init_queues(struct ixl_vsi *vsi)
for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) { for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) {
rxr = &rx_que->rxr; rxr = &rx_que->rxr;
- if (scctx->isc_max_frame_size <= MCLBYTES)+ rxr->mbuf_sz = iflib_get_rx_mbuf_sz(vsi->ctx);
- rxr->mbuf_sz = MCLBYTES;
- else
- rxr->mbuf_sz = MJUMPAGESIZE;
wr32(vsi->hw, rxr->tail, 0); wr32(vsi->hw, rxr->tail, 0);
} }
sys/dev/ixl/ixl_pf_main.c
@@ -1300,10 +1300,7 @@ ixl_initialize_vsi(struct ixl_vsi *vsi)
struct i40e_hmc_obj_rxq rctx; struct i40e_hmc_obj_rxq rctx;
/* Next setup the HMC RX Context */ /* Next setup the HMC RX Context */
- if (scctx->isc_max_frame_size <= MCLBYTES)+ rxr->mbuf_sz = iflib_get_rx_mbuf_sz(vsi->ctx);
- rxr->mbuf_sz = MCLBYTES;
- else
- rxr->mbuf_sz = MJUMPAGESIZE;
u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len; u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len;
sys/dev/pci/pcivar.h
@@ -259,6 +259,13 @@ typedef struct {
extern uint32_t pci_numdevs; extern uint32_t pci_numdevs;
+/*
+ * The bitfield has to be stable and match the fields below (so that
+ * match_flag_vendor must be bit 0) so we have to do the endian dance. We can't
+ * use enums or #define constants because then the macros for subsetting matches
+ * wouldn't work. These tables are parsed by devmatch and others to connect
+ * modules with devices on the PCI bus.
+ */
struct pci_device_table { struct pci_device_table {
#if BYTE_ORDER == LITTLE_ENDIAN #if BYTE_ORDER == LITTLE_ENDIAN
uint16_t uint16_t
sys/dev/psci/psci.c
@@ -79,9 +79,9 @@ struct psci_softc {
}; };
#ifdef FDT #ifdef FDT
-static int psci_v0_1_init(device_t dev);+static int psci_v0_1_init(device_t dev, int default_version);
#endif #endif
-static int psci_v0_2_init(device_t dev);+static int psci_v0_2_init(device_t dev, int default_version);
struct psci_softc *psci_softc = NULL; struct psci_softc *psci_softc = NULL;
@@ -96,15 +96,35 @@ struct psci_softc *psci_softc = NULL;
#endif #endif
#ifdef FDT #ifdef FDT
+struct psci_init_def {
+ int default_version;
+ psci_initfn_t psci_init;
+};
+
+static struct psci_init_def psci_v1_0_init_def = {
+ .default_version = (1 << 16) | 0,
+ .psci_init = psci_v0_2_init
+};
+
+static struct psci_init_def psci_v0_2_init_def = {
+ .default_version = (0 << 16) | 2,
+ .psci_init = psci_v0_2_init
+};
+
+static struct psci_init_def psci_v0_1_init_def = {
+ .default_version = (0 << 16) | 1,
+ .psci_init = psci_v0_1_init
+};
+
static struct ofw_compat_data compat_data[] = { static struct ofw_compat_data compat_data[] = {
- {"arm,psci-1.0", (uintptr_t)psci_v0_2_init},+ {"arm,psci-1.0", (uintptr_t)&psci_v1_0_init_def},
- {"arm,psci-0.2", (uintptr_t)psci_v0_2_init},+ {"arm,psci-0.2", (uintptr_t)&psci_v0_2_init_def},
- {"arm,psci", (uintptr_t)psci_v0_1_init},+ {"arm,psci", (uintptr_t)&psci_v0_1_init_def},
{NULL, 0} {NULL, 0}
}; };
#endif #endif
-static int psci_attach(device_t, psci_initfn_t);+static int psci_attach(device_t, psci_initfn_t, int);
static void psci_shutdown(void *, int); static void psci_shutdown(void *, int);
static int psci_find_callfn(psci_callfn_t *); static int psci_find_callfn(psci_callfn_t *);
@@ -198,12 +218,13 @@ static int
psci_fdt_attach(device_t dev) psci_fdt_attach(device_t dev)
{ {
const struct ofw_compat_data *ocd; const struct ofw_compat_data *ocd;
- psci_initfn_t psci_init;+ struct psci_init_def *psci_init_def;
ocd = ofw_bus_search_compatible(dev, compat_data); ocd = ofw_bus_search_compatible(dev, compat_data);
- psci_init = (psci_initfn_t)ocd->ocd_data;+ psci_init_def = (struct psci_init_def *)ocd->ocd_data;
- return (psci_attach(dev, psci_init));+ return (psci_attach(dev, psci_init_def->psci_init,
+ psci_init_def->default_version));
} }
#endif #endif
@@ -304,12 +325,12 @@ static int
psci_acpi_attach(device_t dev) psci_acpi_attach(device_t dev)
{ {
- return (psci_attach(dev, psci_v0_2_init));+ return (psci_attach(dev, psci_v0_2_init, PSCI_RETVAL_NOT_SUPPORTED));
} }
#endif #endif
static int static int
-psci_attach(device_t dev, psci_initfn_t psci_init)+psci_attach(device_t dev, psci_initfn_t psci_init, int default_version)
{ {
struct psci_softc *sc = device_get_softc(dev); struct psci_softc *sc = device_get_softc(dev);
@@ -317,7 +338,7 @@ psci_attach(device_t dev, psci_initfn_t psci_init)
return (ENXIO); return (ENXIO);
KASSERT(psci_init != NULL, ("PSCI init function cannot be NULL")); KASSERT(psci_init != NULL, ("PSCI init function cannot be NULL"));
- if (psci_init(dev))+ if (psci_init(dev, default_version))
return (ENXIO); return (ENXIO);
psci_softc = sc; psci_softc = sc;
@@ -464,7 +485,7 @@ psci_reset(void)
#ifdef FDT #ifdef FDT
/* Only support PSCI 0.1 on FDT */ /* Only support PSCI 0.1 on FDT */
static int static int
-psci_v0_1_init(device_t dev)+psci_v0_1_init(device_t dev, int default_version __unused)
{ {
struct psci_softc *sc = device_get_softc(dev); struct psci_softc *sc = device_get_softc(dev);
int psci_fn; int psci_fn;
@@ -510,7 +531,7 @@ psci_v0_1_init(device_t dev)
#endif #endif
static int static int
-psci_v0_2_init(device_t dev)+psci_v0_2_init(device_t dev, int default_version)
{ {
struct psci_softc *sc = device_get_softc(dev); struct psci_softc *sc = device_get_softc(dev);
int version; int version;
@@ -529,8 +550,20 @@ psci_v0_2_init(device_t dev)
version = _psci_get_version(sc); version = _psci_get_version(sc);
- if (version == PSCI_RETVAL_NOT_SUPPORTED)+ /*
- return (1);+ * U-Boot PSCI implementation doesn't have psci_get_version()
+ * method implemented for many boards. In this case, use the version
+ * readed from FDT as fallback. No fallback method for ACPI.
+ */
+ if (version == PSCI_RETVAL_NOT_SUPPORTED) {
+ if (default_version == PSCI_RETVAL_NOT_SUPPORTED)
+ return (1);
+
+ version = default_version;
+ printf("PSCI get_version() function is not implemented, "
+ " assuming v%d.%d\n", PSCI_VER_MAJOR(version),
+ PSCI_VER_MINOR(version));
+ }
sc->psci_version = version; sc->psci_version = version;
if ((PSCI_VER_MAJOR(version) == 0 && PSCI_VER_MINOR(version) == 2) || if ((PSCI_VER_MAJOR(version) == 0 && PSCI_VER_MINOR(version) == 2) ||
sys/dev/psci/psci.h
@@ -31,7 +31,7 @@
#include <sys/types.h> #include <sys/types.h>
-typedef int (*psci_initfn_t)(device_t dev);+typedef int (*psci_initfn_t)(device_t dev, int default_version);
typedef int (*psci_callfn_t)(register_t, register_t, register_t, register_t); typedef int (*psci_callfn_t)(register_t, register_t, register_t, register_t);
extern int psci_present; extern int psci_present;
sys/dev/sound/pci/hda/hdac.c
@@ -97,11 +97,11 @@ static const struct {
{ HDA_INTEL_LPTLP1, "Intel Lynx Point-LP", 0, 0 }, { HDA_INTEL_LPTLP1, "Intel Lynx Point-LP", 0, 0 },
{ HDA_INTEL_LPTLP2, "Intel Lynx Point-LP", 0, 0 }, { HDA_INTEL_LPTLP2, "Intel Lynx Point-LP", 0, 0 },
{ HDA_INTEL_SRPTLP, "Intel Sunrise Point-LP", 0, 0 }, { HDA_INTEL_SRPTLP, "Intel Sunrise Point-LP", 0, 0 },
- { HDA_INTEL_KBLKLP, "Intel Kabylake-LP", 0, 0 },+ { HDA_INTEL_KBLKLP, "Intel Kaby Lake-LP", 0, 0 },
{ HDA_INTEL_SRPT, "Intel Sunrise Point", 0, 0 }, { HDA_INTEL_SRPT, "Intel Sunrise Point", 0, 0 },
- { HDA_INTEL_KBLK, "Intel Kabylake", 0, 0 },+ { HDA_INTEL_KBLK, "Intel Kaby Lake", 0, 0 },
- { HDA_INTEL_KBLKH, "Intel Kabylake-H", 0, 0 },+ { HDA_INTEL_KBLKH, "Intel Kaby Lake-H", 0, 0 },
- { HDA_INTEL_CFLK, "Intel Coffelake", 0, 0 },+ { HDA_INTEL_CFLK, "Intel Coffee Lake", 0, 0 },
{ HDA_INTEL_82801F, "Intel 82801F", 0, 0 }, { HDA_INTEL_82801F, "Intel 82801F", 0, 0 },
{ HDA_INTEL_63XXESB, "Intel 631x/632xESB", 0, 0 }, { HDA_INTEL_63XXESB, "Intel 631x/632xESB", 0, 0 },
{ HDA_INTEL_82801G, "Intel 82801G", 0, 0 }, { HDA_INTEL_82801G, "Intel 82801G", 0, 0 },
sys/dev/sound/pci/hda/hdacc.c
@@ -368,7 +368,7 @@ static const struct {
{ HDA_CODEC_INTELHSW, 0, "Intel Haswell" }, { HDA_CODEC_INTELHSW, 0, "Intel Haswell" },
{ HDA_CODEC_INTELBDW, 0, "Intel Broadwell" }, { HDA_CODEC_INTELBDW, 0, "Intel Broadwell" },
{ HDA_CODEC_INTELSKLK, 0, "Intel Skylake" }, { HDA_CODEC_INTELSKLK, 0, "Intel Skylake" },
- { HDA_CODEC_INTELKBLK, 0, "Intel Kabylake" },+ { HDA_CODEC_INTELKBLK, 0, "Intel Kaby Lake" },
{ HDA_CODEC_INTELCL, 0, "Intel Crestline" }, { HDA_CODEC_INTELCL, 0, "Intel Crestline" },
{ HDA_CODEC_SII1390, 0, "Silicon Image SiI1390" }, { HDA_CODEC_SII1390, 0, "Silicon Image SiI1390" },
{ HDA_CODEC_SII1392, 0, "Silicon Image SiI1392" }, { HDA_CODEC_SII1392, 0, "Silicon Image SiI1392" },
sys/fs/fuse/fuse.h
@@ -143,7 +143,7 @@
/* misc */ /* misc */
-SYSCTL_DECL(_vfs_fuse);+SYSCTL_DECL(_vfs_fusefs);
/* Fuse locking */ /* Fuse locking */
sys/fs/fuse/fuse_file.c
@@ -92,7 +92,7 @@ SDT_PROBE_DEFINE2(fuse, , file, trace, "int", "char*");
static int fuse_fh_count = 0; static int fuse_fh_count = 0;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, filehandle_count, CTLFLAG_RD,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, filehandle_count, CTLFLAG_RD,
&fuse_fh_count, 0, "number of open FUSE filehandles"); &fuse_fh_count, 0, "number of open FUSE filehandles");
int int
sys/fs/fuse/fuse_ipc.c
@@ -104,21 +104,21 @@ static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
static fuse_handler_t fuse_standard_handler; static fuse_handler_t fuse_standard_handler;
-SYSCTL_NODE(_vfs, OID_AUTO, fuse, CTLFLAG_RW, 0, "FUSE tunables");+SYSCTL_NODE(_vfs, OID_AUTO, fusefs, CTLFLAG_RW, 0, "FUSE tunables");
-SYSCTL_STRING(_vfs_fuse, OID_AUTO, version, CTLFLAG_RD,+SYSCTL_STRING(_vfs_fusefs, OID_AUTO, version, CTLFLAG_RD,
FUSE_FREEBSD_VERSION, 0, "fuse-freebsd version"); FUSE_FREEBSD_VERSION, 0, "fuse-freebsd version");
static int fuse_ticket_count = 0; static int fuse_ticket_count = 0;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, ticket_count, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, ticket_count, CTLFLAG_RW,
&fuse_ticket_count, 0, "number of allocated tickets"); &fuse_ticket_count, 0, "number of allocated tickets");
static long fuse_iov_permanent_bufsize = 1 << 19; static long fuse_iov_permanent_bufsize = 1 << 19;
-SYSCTL_LONG(_vfs_fuse, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,+SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
&fuse_iov_permanent_bufsize, 0, &fuse_iov_permanent_bufsize, 0,
"limit for permanently stored buffer size for fuse_iovs"); "limit for permanently stored buffer size for fuse_iovs");
static int fuse_iov_credit = 16; static int fuse_iov_credit = 16;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, iov_credit, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
&fuse_iov_credit, 0, &fuse_iov_credit, 0,
"how many times is an oversized fuse_iov tolerated"); "how many times is an oversized fuse_iov tolerated");
sys/fs/fuse/fuse_main.c
@@ -96,9 +96,9 @@ static struct vfsconf fuse_vfsconf = {
.vfc_flags = VFCF_JAIL | VFCF_SYNTHETIC .vfc_flags = VFCF_JAIL | VFCF_SYNTHETIC
}; };
-SYSCTL_INT(_vfs_fuse, OID_AUTO, kernelabi_major, CTLFLAG_RD,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, kernelabi_major, CTLFLAG_RD,
SYSCTL_NULL_INT_PTR, FUSE_KERNEL_VERSION, "FUSE kernel abi major version"); SYSCTL_NULL_INT_PTR, FUSE_KERNEL_VERSION, "FUSE kernel abi major version");
-SYSCTL_INT(_vfs_fuse, OID_AUTO, kernelabi_minor, CTLFLAG_RD,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, kernelabi_minor, CTLFLAG_RD,
SYSCTL_NULL_INT_PTR, FUSE_KERNEL_MINOR_VERSION, "FUSE kernel abi minor version"); SYSCTL_NULL_INT_PTR, FUSE_KERNEL_MINOR_VERSION, "FUSE kernel abi minor version");
SDT_PROVIDER_DEFINE(fuse); SDT_PROVIDER_DEFINE(fuse);
@@ -159,10 +159,10 @@ fuse_loader(struct module *m, int what, void *arg)
/* Registering the module */ /* Registering the module */
static moduledata_t fuse_moddata = { static moduledata_t fuse_moddata = {
- "fuse",+ "fusefs",
fuse_loader, fuse_loader,
&fuse_vfsconf &fuse_vfsconf
}; };
-DECLARE_MODULE(fuse, fuse_moddata, SI_SUB_VFS, SI_ORDER_MIDDLE);+DECLARE_MODULE(fusefs, fuse_moddata, SI_SUB_VFS, SI_ORDER_MIDDLE);
-MODULE_VERSION(fuse, 1);+MODULE_VERSION(fusefs, 1);
sys/fs/fuse/fuse_node.c
@@ -103,47 +103,47 @@ static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS);
static int fuse_node_count = 0; static int fuse_node_count = 0;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, node_count, CTLFLAG_RD,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, node_count, CTLFLAG_RD,
&fuse_node_count, 0, "Count of FUSE vnodes"); &fuse_node_count, 0, "Count of FUSE vnodes");
int fuse_data_cache_mode = FUSE_CACHE_WT; int fuse_data_cache_mode = FUSE_CACHE_WT;
-SYSCTL_PROC(_vfs_fuse, OID_AUTO, data_cache_mode, CTLTYPE_INT|CTLFLAG_RW,+SYSCTL_PROC(_vfs_fusefs, OID_AUTO, data_cache_mode, CTLTYPE_INT|CTLFLAG_RW,
&fuse_data_cache_mode, 0, sysctl_fuse_cache_mode, "I", &fuse_data_cache_mode, 0, sysctl_fuse_cache_mode, "I",
"Zero: disable caching of FUSE file data; One: write-through caching " "Zero: disable caching of FUSE file data; One: write-through caching "
"(default); Two: write-back caching (generally unsafe)"); "(default); Two: write-back caching (generally unsafe)");
int fuse_data_cache_invalidate = 0; int fuse_data_cache_invalidate = 0;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, data_cache_invalidate, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, data_cache_invalidate, CTLFLAG_RW,
&fuse_data_cache_invalidate, 0, &fuse_data_cache_invalidate, 0,
"If non-zero, discard cached clean file data when there are no active file" "If non-zero, discard cached clean file data when there are no active file"
" users"); " users");
int fuse_mmap_enable = 1; int fuse_mmap_enable = 1;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, mmap_enable, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, mmap_enable, CTLFLAG_RW,
&fuse_mmap_enable, 0, &fuse_mmap_enable, 0,
"If non-zero, and data_cache_mode is also non-zero, enable mmap(2) of " "If non-zero, and data_cache_mode is also non-zero, enable mmap(2) of "
"FUSE files"); "FUSE files");
int fuse_refresh_size = 0; int fuse_refresh_size = 0;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, refresh_size, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, refresh_size, CTLFLAG_RW,
&fuse_refresh_size, 0, &fuse_refresh_size, 0,
"If non-zero, and no dirty file extension data is buffered, fetch file " "If non-zero, and no dirty file extension data is buffered, fetch file "
"size before write operations"); "size before write operations");
int fuse_sync_resize = 1; int fuse_sync_resize = 1;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, sync_resize, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, sync_resize, CTLFLAG_RW,
&fuse_sync_resize, 0, &fuse_sync_resize, 0,
"If a cached write extended a file, inform FUSE filesystem of the changed" "If a cached write extended a file, inform FUSE filesystem of the changed"
"size immediately subsequent to the issued writes"); "size immediately subsequent to the issued writes");
int fuse_fix_broken_io = 0; int fuse_fix_broken_io = 0;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, fix_broken_io, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, fix_broken_io, CTLFLAG_RW,
&fuse_fix_broken_io, 0, &fuse_fix_broken_io, 0,
"If non-zero, print a diagnostic warning if a userspace filesystem returns" "If non-zero, print a diagnostic warning if a userspace filesystem returns"
" EIO on reads of recently extended portions of files"); " EIO on reads of recently extended portions of files");
sys/fs/fuse/fuse_vfsops.c
@@ -120,16 +120,16 @@ struct vfsops fuse_vfsops = {
.vfs_statfs = fuse_vfsop_statfs, .vfs_statfs = fuse_vfsop_statfs,
}; };
-SYSCTL_INT(_vfs_fuse, OID_AUTO, init_backgrounded, CTLFLAG_RD,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, init_backgrounded, CTLFLAG_RD,
SYSCTL_NULL_INT_PTR, 1, "indicate async handshake"); SYSCTL_NULL_INT_PTR, 1, "indicate async handshake");
static int fuse_enforce_dev_perms = 0; static int fuse_enforce_dev_perms = 0;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, enforce_dev_perms, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, enforce_dev_perms, CTLFLAG_RW,
&fuse_enforce_dev_perms, 0, &fuse_enforce_dev_perms, 0,
"enforce fuse device permissions for secondary mounts"); "enforce fuse device permissions for secondary mounts");
static unsigned sync_unmount = 1; static unsigned sync_unmount = 1;
-SYSCTL_UINT(_vfs_fuse, OID_AUTO, sync_unmount, CTLFLAG_RW,+SYSCTL_UINT(_vfs_fusefs, OID_AUTO, sync_unmount, CTLFLAG_RW,
&sync_unmount, 0, "specify when to use synchronous unmount"); &sync_unmount, 0, "specify when to use synchronous unmount");
MALLOC_DEFINE(M_FUSEVFS, "fuse_filesystem", "buffer for fuse vfs layer"); MALLOC_DEFINE(M_FUSEVFS, "fuse_filesystem", "buffer for fuse vfs layer");
sys/fs/fuse/fuse_vnops.c
@@ -183,17 +183,17 @@ struct vop_vector fuse_vnops = {
static u_long fuse_lookup_cache_hits = 0; static u_long fuse_lookup_cache_hits = 0;
-SYSCTL_ULONG(_vfs_fuse, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,+SYSCTL_ULONG(_vfs_fusefs, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
&fuse_lookup_cache_hits, 0, "number of positive cache hits in lookup"); &fuse_lookup_cache_hits, 0, "number of positive cache hits in lookup");
static u_long fuse_lookup_cache_misses = 0; static u_long fuse_lookup_cache_misses = 0;
-SYSCTL_ULONG(_vfs_fuse, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,+SYSCTL_ULONG(_vfs_fusefs, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
&fuse_lookup_cache_misses, 0, "number of cache misses in lookup"); &fuse_lookup_cache_misses, 0, "number of cache misses in lookup");
int fuse_lookup_cache_enable = 1; int fuse_lookup_cache_enable = 1;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, lookup_cache_enable, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, lookup_cache_enable, CTLFLAG_RW,
&fuse_lookup_cache_enable, 0, "if non-zero, enable lookup cache"); &fuse_lookup_cache_enable, 0, "if non-zero, enable lookup cache");
/* /*
@@ -202,7 +202,7 @@ SYSCTL_INT(_vfs_fuse, OID_AUTO, lookup_cache_enable, CTLFLAG_RW,
*/ */
static int fuse_reclaim_revoked = 0; static int fuse_reclaim_revoked = 0;
-SYSCTL_INT(_vfs_fuse, OID_AUTO, reclaim_revoked, CTLFLAG_RW,+SYSCTL_INT(_vfs_fusefs, OID_AUTO, reclaim_revoked, CTLFLAG_RW,
&fuse_reclaim_revoked, 0, ""); &fuse_reclaim_revoked, 0, "");
uma_zone_t fuse_pbuf_zone; uma_zone_t fuse_pbuf_zone;
sys/modules/Makefile
@@ -129,7 +129,7 @@ SUBDIR= \
filemon \ filemon \
firewire \ firewire \
firmware \ firmware \
- fuse \+ fusefs \
${_fxp} \ ${_fxp} \
gem \ gem \
geom \ geom \
sys/modules/fuse/Makefilesys/modules/fusefs/Makefile
@@ -2,9 +2,12 @@
.PATH: ${SRCTOP}/sys/fs/fuse .PATH: ${SRCTOP}/sys/fs/fuse
-KMOD= fuse+KMOD= fusefs
SRCS= vnode_if.h \ SRCS= vnode_if.h \
fuse_node.c fuse_io.c fuse_device.c fuse_ipc.c fuse_file.c \ fuse_node.c fuse_io.c fuse_device.c fuse_ipc.c fuse_file.c \
fuse_vfsops.c fuse_vnops.c fuse_internal.c fuse_main.c fuse_vfsops.c fuse_vnops.c fuse_internal.c fuse_main.c
+# Symlink for backwards compatibility with systems installed at 12.0 or older
+LINKS= ${KMODDIR}/${KMOD}.ko ${KMODDIR}/fuse.ko
+
.include <bsd.kmod.mk> .include <bsd.kmod.mk>
sys/modules/ipfw_nat64/Makefile
@@ -8,4 +8,6 @@ SRCS+= nat64clat.c nat64clat_control.c
SRCS+= nat64lsn.c nat64lsn_control.c SRCS+= nat64lsn.c nat64lsn_control.c
SRCS+= nat64stl.c nat64stl_control.c SRCS+= nat64stl.c nat64stl_control.c
+CFLAGS+= -I${SRCTOP}/sys/contrib/ck/include
+
.include <bsd.kmod.mk> .include <bsd.kmod.mk>
sys/net/bpf.c
@@ -2592,16 +2592,16 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
{ {
struct bpf_if *bp; struct bpf_if *bp;
- bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);+ KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
- if (bp == NULL)+
- panic("bpfattach");+ bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
+ rw_init(&bp->bif_lock, "bpf interface lock");
LIST_INIT(&bp->bif_dlist); LIST_INIT(&bp->bif_dlist);
LIST_INIT(&bp->bif_wlist); LIST_INIT(&bp->bif_wlist);
bp->bif_ifp = ifp; bp->bif_ifp = ifp;
bp->bif_dlt = dlt; bp->bif_dlt = dlt;
- rw_init(&bp->bif_lock, "bpf interface lock");+ bp->bif_hdrlen = hdrlen;
- KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
bp->bif_bpf = driverp; bp->bif_bpf = driverp;
*driverp = bp; *driverp = bp;
@@ -2609,8 +2609,6 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
BPF_UNLOCK(); BPF_UNLOCK();
- bp->bif_hdrlen = hdrlen;
-
if (bootverbose && IS_DEFAULT_VNET(curvnet)) if (bootverbose && IS_DEFAULT_VNET(curvnet))
if_printf(ifp, "bpf attached\n"); if_printf(ifp, "bpf attached\n");
} }
sys/net/iflib.c
@@ -171,6 +171,7 @@ struct iflib_ctx {
uint32_t ifc_if_flags; uint32_t ifc_if_flags;
uint32_t ifc_flags; uint32_t ifc_flags;
uint32_t ifc_max_fl_buf_size; uint32_t ifc_max_fl_buf_size;
+ uint32_t ifc_rx_mbuf_sz;
int ifc_link_state; int ifc_link_state;
int ifc_link_irq; int ifc_link_irq;
@@ -2172,7 +2173,6 @@ iflib_fl_setup(iflib_fl_t fl)
{ {
iflib_rxq_t rxq = fl->ifl_rxq; iflib_rxq_t rxq = fl->ifl_rxq;
if_ctx_t ctx = rxq->ifr_ctx; if_ctx_t ctx = rxq->ifr_ctx;
- if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1);
/* /*
@@ -2181,14 +2181,7 @@ iflib_fl_setup(iflib_fl_t fl)
iflib_fl_bufs_free(fl); iflib_fl_bufs_free(fl);
/* Now replenish the mbufs */ /* Now replenish the mbufs */
MPASS(fl->ifl_credits == 0); MPASS(fl->ifl_credits == 0);
- /*+ fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz;
- * XXX don't set the max_frame_size to larger
- * than the hardware can handle
- */
- if (sctx->isc_max_frame_size <= 2048)
- fl->ifl_buf_size = MCLBYTES;
- else
- fl->ifl_buf_size = MJUMPAGESIZE;
if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size)
ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; ctx->ifc_max_fl_buf_size = fl->ifl_buf_size;
fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_cltype = m_gettype(fl->ifl_buf_size);
@@ -2313,6 +2306,27 @@ iflib_timer(void *arg)
STATE_UNLOCK(ctx); STATE_UNLOCK(ctx);
} }
+static void
+iflib_calc_rx_mbuf_sz(if_ctx_t ctx)
+{
+ if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
+
+ /*
+ * XXX don't set the max_frame_size to larger
+ * than the hardware can handle
+ */
+ if (sctx->isc_max_frame_size <= MCLBYTES)
+ ctx->ifc_rx_mbuf_sz = MCLBYTES;
+ else
+ ctx->ifc_rx_mbuf_sz = MJUMPAGESIZE;
+}
+
+uint32_t
+iflib_get_rx_mbuf_sz(if_ctx_t ctx)
+{
+ return (ctx->ifc_rx_mbuf_sz);
+}
+
static void static void
iflib_init_locked(if_ctx_t ctx) iflib_init_locked(if_ctx_t ctx)
{ {
@@ -2347,6 +2361,14 @@ iflib_init_locked(if_ctx_t ctx)
CALLOUT_UNLOCK(txq); CALLOUT_UNLOCK(txq);
iflib_netmap_txq_init(ctx, txq); iflib_netmap_txq_init(ctx, txq);
} }
+
+ /*
+ * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so
+ * that drivers can use the value when setting up the hardware receive
+ * buffers.
+ */
+ iflib_calc_rx_mbuf_sz(ctx);
+
#ifdef INVARIANTS #ifdef INVARIANTS
i = if_getdrvflags(ifp); i = if_getdrvflags(ifp);
#endif #endif
@@ -3276,9 +3298,14 @@ defrag:
txq->ift_mbuf_defrag++; txq->ift_mbuf_defrag++;
m_head = m_defrag(*m_headp, M_NOWAIT); m_head = m_defrag(*m_headp, M_NOWAIT);
} }
- remap++;+ /*
- if (__predict_false(m_head == NULL))+ * remap should never be >1 unless bus_dmamap_load_mbuf_sg
+ * failed to map an mbuf that was run through m_defrag
+ */
+ MPASS(remap <= 1);
+ if (__predict_false(m_head == NULL || remap > 1))
goto defrag_failed; goto defrag_failed;
+ remap++;
*m_headp = m_head; *m_headp = m_head;
goto retry; goto retry;
break; break;
@@ -6230,8 +6257,8 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
CTLFLAG_RD, NULL, "IFLIB fields"); CTLFLAG_RD, NULL, "IFLIB fields");
oid_list = SYSCTL_CHILDREN(node); oid_list = SYSCTL_CHILDREN(node);
- SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",+ SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
- CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0,+ CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version,
"driver version"); "driver version");
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
sys/net/iflib.h
@@ -248,7 +248,7 @@ struct if_shared_ctx {
/* fields necessary for probe */ /* fields necessary for probe */
pci_vendor_info_t *isc_vendor_info; pci_vendor_info_t *isc_vendor_info;
- char *isc_driver_version;+ const char *isc_driver_version;
/* optional function to transform the read values to match the table*/ /* optional function to transform the read values to match the table*/
void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id,
uint16_t *subdevice_id, uint16_t *rev_id); uint16_t *subdevice_id, uint16_t *rev_id);
@@ -381,6 +381,8 @@ void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]);
void iflib_request_reset(if_ctx_t ctx); void iflib_request_reset(if_ctx_t ctx);
uint8_t iflib_in_detach(if_ctx_t ctx); uint8_t iflib_in_detach(if_ctx_t ctx);
+uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx);
+
/* /*
* If the driver can plug cleanly in to newbus use these * If the driver can plug cleanly in to newbus use these
*/ */
sys/netinet6/ip_fw_nat64.h
@@ -122,7 +122,7 @@ typedef struct _ipfw_nat64clat_cfg {
/* /*
* NAT64LSN default configuration values * NAT64LSN default configuration values
*/ */
-#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */+#define NAT64LSN_MAX_PORTS 2048 /* Unused */
#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */ #define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */
#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */ #define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */
#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */ #define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */
@@ -135,16 +135,20 @@ typedef struct _ipfw_nat64clat_cfg {
typedef struct _ipfw_nat64lsn_cfg { typedef struct _ipfw_nat64lsn_cfg {
char name[64]; /* NAT name */ char name[64]; /* NAT name */
uint32_t flags; uint32_t flags;
- uint32_t max_ports; /* Max ports per client */+
- uint32_t agg_prefix_len; /* Prefix length to count */+ uint32_t max_ports; /* Unused */
- uint32_t agg_prefix_max; /* Max hosts per agg prefix */+ uint32_t agg_prefix_len; /* Unused */
+ uint32_t agg_prefix_max; /* Unused */
+
struct in_addr prefix4; struct in_addr prefix4;
uint16_t plen4; /* Prefix length */ uint16_t plen4; /* Prefix length */
uint16_t plen6; /* Prefix length */ uint16_t plen6; /* Prefix length */
struct in6_addr prefix6; /* NAT64 prefix */ struct in6_addr prefix6; /* NAT64 prefix */
uint32_t jmaxlen; /* Max jobqueue length */ uint32_t jmaxlen; /* Max jobqueue length */
- uint16_t min_port; /* Min port group # to use */+
- uint16_t max_port; /* Max port group # to use */+ uint16_t min_port; /* Unused */
+ uint16_t max_port; /* Unused */
+
uint16_t nh_delete_delay;/* Stale host delete delay */ uint16_t nh_delete_delay;/* Stale host delete delay */
uint16_t pg_delete_delay;/* Stale portgroup delete delay */ uint16_t pg_delete_delay;/* Stale portgroup delete delay */
uint16_t st_syn_ttl; /* TCP syn expire */ uint16_t st_syn_ttl; /* TCP syn expire */
@@ -153,7 +157,7 @@ typedef struct _ipfw_nat64lsn_cfg {
uint16_t st_udp_ttl; /* UDP expire */ uint16_t st_udp_ttl; /* UDP expire */
uint16_t st_icmp_ttl; /* ICMP expire */ uint16_t st_icmp_ttl; /* ICMP expire */
uint8_t set; /* Named instance set [0..31] */ uint8_t set; /* Named instance set [0..31] */
- uint8_t spare;+ uint8_t states_chunks; /* Number of states chunks per PG */
} ipfw_nat64lsn_cfg; } ipfw_nat64lsn_cfg;
typedef struct _ipfw_nat64lsn_state { typedef struct _ipfw_nat64lsn_state {
@@ -177,5 +181,30 @@ typedef struct _ipfw_nat64lsn_stg {
uint32_t spare2; uint32_t spare2;
} ipfw_nat64lsn_stg; } ipfw_nat64lsn_stg;
-#endif /* _NETINET6_IP_FW_NAT64_H_ */+typedef struct _ipfw_nat64lsn_state_v1 {
+ struct in6_addr host6; /* Bound IPv6 host */
+ struct in_addr daddr; /* Remote IPv4 address */
+ uint16_t dport; /* Remote destination port */
+ uint16_t aport; /* Local alias port */
+ uint16_t sport; /* Source port */
+ uint16_t spare;
+ uint16_t idle; /* Last used time */
+ uint8_t flags; /* State flags */
+ uint8_t proto; /* protocol */
+} ipfw_nat64lsn_state_v1;
+typedef struct _ipfw_nat64lsn_stg_v1 {
+ union nat64lsn_pgidx {
+ uint64_t index;
+ struct {
+ uint8_t chunk; /* states chunk */
+ uint8_t proto; /* protocol */
+ uint16_t port; /* base port */
+ in_addr_t addr; /* alias address */
+ };
+ } next; /* next state index */
+ struct in_addr alias4; /* IPv4 alias address */
+ uint32_t count; /* Number of states */
+} ipfw_nat64lsn_stg_v1;
+
+#endif /* _NETINET6_IP_FW_NAT64_H_ */
sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -33,16 +33,17 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
#include <sys/counter.h> #include <sys/counter.h>
+#include <sys/ck.h>
+#include <sys/epoch.h>
#include <sys/errno.h> #include <sys/errno.h>
+#include <sys/hash.h>
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/lock.h> #include <sys/lock.h>
#include <sys/malloc.h> #include <sys/malloc.h>
#include <sys/mbuf.h> #include <sys/mbuf.h>
#include <sys/module.h> #include <sys/module.h>
#include <sys/rmlock.h> #include <sys/rmlock.h>
-#include <sys/rwlock.h>
#include <sys/socket.h> #include <sys/socket.h>
-#include <sys/queue.h>
#include <sys/syslog.h> #include <sys/syslog.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
@@ -71,17 +72,20 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN"); MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
-static void nat64lsn_periodic(void *data);+#define NAT64LSN_EPOCH_ENTER(et) NET_EPOCH_ENTER(et)
-#define PERIODIC_DELAY 4+#define NAT64LSN_EPOCH_EXIT(et) NET_EPOCH_EXIT(et)
-static uint8_t nat64lsn_proto_map[256];+#define NAT64LSN_EPOCH_ASSERT() NET_EPOCH_ASSERT()
-uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];+#define NAT64LSN_EPOCH_CALL(c, f) epoch_call(net_epoch_preempt, (c), (f))
-#define NAT64_FLAG_FIN 0x01 /* FIN was seen */+static uma_zone_t nat64lsn_host_zone;
-#define NAT64_FLAG_SYN 0x02 /* First syn in->out */+static uma_zone_t nat64lsn_pgchunk_zone;
-#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */+static uma_zone_t nat64lsn_pg_zone;
-#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)+static uma_zone_t nat64lsn_aliaslink_zone;
+static uma_zone_t nat64lsn_state_zone;
+static uma_zone_t nat64lsn_job_zone;
-#define NAT64_FLAG_RDR 0x80 /* Port redirect */+static void nat64lsn_periodic(void *data);
+#define PERIODIC_DELAY 4
#define NAT64_LOOKUP(chain, cmd) \ #define NAT64_LOOKUP(chain, cmd) \
(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1) (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
/* /*
@@ -91,25 +95,33 @@ uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
enum nat64lsn_jtype { enum nat64lsn_jtype {
JTYPE_NEWHOST = 1, JTYPE_NEWHOST = 1,
JTYPE_NEWPORTGROUP, JTYPE_NEWPORTGROUP,
- JTYPE_DELPORTGROUP,+ JTYPE_DESTROY,
}; };
struct nat64lsn_job_item { struct nat64lsn_job_item {
- TAILQ_ENTRY(nat64lsn_job_item) next;+ STAILQ_ENTRY(nat64lsn_job_item) entries;
enum nat64lsn_jtype jtype; enum nat64lsn_jtype jtype;
- struct nat64lsn_host *nh;+
- struct nat64lsn_portgroup *pg;+ union {
- void *spare_idx;+ struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
- struct in6_addr haddr;+ struct mbuf *m;
- uint8_t nat_proto;+ struct nat64lsn_host *host;
- uint8_t done;+ struct nat64lsn_state *state;
- int needs_idx;+ uint32_t src6_hval;
- int delcount;+ uint32_t state_hval;
- unsigned int fhash; /* Flow hash */+ struct ipfw_flow_id f_id;
- uint32_t aaddr; /* Last used address (net) */+ in_addr_t faddr;
- struct mbuf *m;+ uint16_t port;
- struct ipfw_flow_id f_id;+ uint8_t proto;
- uint64_t delmask[NAT64LSN_PGPTRNMASK];+ uint8_t done;
+ };
+ struct { /* used by JTYPE_DESTROY */
+ struct nat64lsn_hosts_slist hosts;
+ struct nat64lsn_pg_slist portgroups;
+ struct nat64lsn_pgchunk *pgchunk;
+ struct epoch_context epoch_ctx;
+ };
+ };
}; };
static struct mtx jmtx; static struct mtx jmtx;
@@ -118,143 +130,311 @@ static struct mtx jmtx;
#define JQUEUE_LOCK() mtx_lock(&jmtx) #define JQUEUE_LOCK() mtx_lock(&jmtx)
#define JQUEUE_UNLOCK() mtx_unlock(&jmtx) #define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
+static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static struct nat64lsn_job_item *nat64lsn_create_job(
+ struct nat64lsn_cfg *cfg, int jtype);
static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
struct nat64lsn_job_item *ji); struct nat64lsn_job_item *ji);
-static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,+static void nat64lsn_job_destroy(epoch_context_t ctx);
- struct nat64lsn_job_head *jhead, int jlen);+static void nat64lsn_destroy_host(struct nat64lsn_host *host);
-+static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
-static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg,+
- const struct ipfw_flow_id *f_id, int jtype);
-static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
- int needs_idx);
-static int nat64lsn_request_host(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm);
static int nat64lsn_translate4(struct nat64lsn_cfg *cfg, static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm);+ const struct ipfw_flow_id *f_id, struct mbuf **mp);
static int nat64lsn_translate6(struct nat64lsn_cfg *cfg, static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
- struct ipfw_flow_id *f_id, struct mbuf **pm);+ struct ipfw_flow_id *f_id, struct mbuf **mp);
-+static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
-static int alloc_portgroup(struct nat64lsn_job_item *ji);+ struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
-static void destroy_portgroup(struct nat64lsn_portgroup *pg);+
-static void destroy_host6(struct nat64lsn_host *nh);+#define NAT64_BIT_TCP_FIN 0 /* FIN was seen */
-static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);+#define NAT64_BIT_TCP_SYN 1 /* First syn in->out */
+#define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */
+#define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */
+#define NAT64_BIT_STALE 7 /* state is going to be expired */
+
+#define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
+#define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
+#define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
+#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
-static int attach_portgroup(struct nat64lsn_cfg *cfg,+#define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
- struct nat64lsn_job_item *ji);+#define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
-static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+static inline uint8_t
+convert_tcp_flags(uint8_t flags)
+{
+ uint8_t result;
-/* XXX tmp */+ result = flags & (TH_FIN|TH_SYN);
-static uma_zone_t nat64lsn_host_zone;+ result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
-static uma_zone_t nat64lsn_pg_zone;+ result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
-static uma_zone_t nat64lsn_pgidx_zone;
-static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,+ return (result);
- struct nat64lsn_host *nh);+}
-#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16))+static void
-#define I6_first(_ph, h) (_ph)[h]+nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
-#define I6_next(x) (x)->next+ struct nat64lsn_state *state)
-#define I6_val(x) (&(x)->addr)+{
-#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b)
-#define I6_lock(a, b)
-#define I6_unlock(a, b)
-#define I6HASH_FIND(_cfg, _res, _a) \+ memset(plog, 0, sizeof(*plog));
- CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)+ plog->length = PFLOG_REAL_HDRLEN;
-#define I6HASH_INSERT(_cfg, _i) \+ plog->af = family;
- CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)+ plog->action = PF_NAT;
-#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \+ plog->dir = PF_IN;
- CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)+ plog->rulenr = htonl(state->ip_src);
+ plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
+ (state->proto << 8) | (state->ip_dst & 0xff));
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
-#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \+#define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
- CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)+#define HOST_HVAL(c, a) HVAL((a),\
+ sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
+#define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
+
+#define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
+ sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
+#define ALIAS_BYHASH(c, v) \
+ ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
+static struct nat64lsn_aliaslink*
+nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
+ struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
+{
-#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8)+ /*
+ * We can implement some different algorithms how
+ * select an alias address.
+ * XXX: for now we use first available.
+ */
+ return (CK_SLIST_FIRST(&host->aliases));
+}
-static unsigned+#define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
-djb_hash(const unsigned char *h, const int len)+#define STATE_HASH(h, v) \
+ ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
+#define STATES_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? (p)->states : \
+ ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
+
+#ifdef __LP64__
+#define FREEMASK_FFSLL(pg, faddr) \
+ ffsll(*FREEMASK_CHUNK((pg), (faddr)))
+#define FREEMASK_BTR(pg, faddr, bit) \
+ ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_BTS(pg, faddr, bit) \
+ ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_ISSET(pg, faddr, bit) \
+ ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_COPY(pg, n, out) \
+ (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
+#else
+static inline int
+freemask_ffsll(uint32_t *freemask)
{ {
- unsigned int result = 0;
int i; int i;
- for (i = 0; i < len; i++)+ if ((i = ffsl(freemask[0])) != 0)
- result = 33 * result ^ h[i];+ return (i);
-+ if ((i = ffsl(freemask[1])) != 0)
- return (result);+ return (i + 32);
+ return (0);
} }
-+#define FREEMASK_FFSLL(pg, faddr) \
-/*+ freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
-static size_t +#define FREEMASK_BTR(pg, faddr, bit) \
-bitmask_size(size_t num, int *level)+ ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
+#define FREEMASK_BTS(pg, faddr, bit) \
+ ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
+#define FREEMASK_ISSET(pg, faddr, bit) \
+ ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
+#define FREEMASK_COPY(pg, n, out) \
+ (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
+ ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
+#endif /* !__LP64__ */
+
+
+#define NAT64LSN_TRY_PGCNT 32
+static struct nat64lsn_pg*
+nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
+ struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
+ uint32_t *pgidx, in_addr_t faddr)
{ {
- size_t x;+ struct nat64lsn_pg *pg, *oldpg;
- int c;+ uint32_t idx, oldidx;
+ int cnt;
+
+ cnt = 0;
+ /* First try last used PG */
+ oldpg = pg = ck_pr_load_ptr(pgptr);
+ idx = oldidx = ck_pr_load_32(pgidx);
+ /* If pgidx is out of range, reset it to the first pgchunk */
+ if (!ISSET32(*chunkmask, idx / 32))
+ idx = 0;
+ do {
+ ck_pr_fence_load();
+ if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
+ /*
+ * If last used PG has not free states,
+ * try to update pointer.
+ * NOTE: it can be already updated by jobs handler,
+ * thus we use CAS operation.
+ */
+ if (cnt > 0)
+ ck_pr_cas_ptr(pgptr, oldpg, pg);
+ return (pg);
+ }
+ /* Stop if idx is out of range */
+ if (!ISSET32(*chunkmask, idx / 32))
+ break;
- for (c = 0, x = num; num > 1; num /= 64, c++)+ if (ISSET32(pgmask[idx / 32], idx % 32))
- ;+ pg = ck_pr_load_ptr(
+ &chunks[idx / 32]->pgptr[idx % 32]);
+ else
+ pg = NULL;
- return (x);+ idx++;
+ } while (++cnt < NAT64LSN_TRY_PGCNT);
+
+ /* If pgidx is out of range, reset it to the first pgchunk */
+ if (!ISSET32(*chunkmask, idx / 32))
+ idx = 0;
+ ck_pr_cas_32(pgidx, oldidx, idx);
+ return (NULL);
} }
-static void+static struct nat64lsn_state*
-bitmask_prepare(uint64_t *pmask, size_t bufsize, int level)+nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
+ const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
+ uint16_t port, uint8_t proto)
{ {
- size_t x, z;+ struct nat64lsn_aliaslink *link;
+ struct nat64lsn_state *state;
+ struct nat64lsn_pg *pg;
+ int i, offset;
+
+ NAT64LSN_EPOCH_ASSERT();
+
+ /* Check that we already have state for given arguments */
+ CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
+ if (state->proto == proto && state->ip_dst == faddr &&
+ state->sport == port && state->dport == f_id->dst_port)
+ return (state);
+ }
- memset(pmask, 0xFF, bufsize);+ link = nat64lsn_get_aliaslink(cfg, host, f_id);
- for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)+ if (link == NULL)
- ;+ return (NULL);
- pmask[x] ~= 0x01;
-}
-*/
-static void+ switch (proto) {
-nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,+ case IPPROTO_TCP:
- uint32_t n, uint32_t sn)+ pg = nat64lsn_get_pg(
-{+ &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
+ link->alias->tcp, &link->alias->tcp_pg,
+ &link->alias->tcp_pgidx, faddr);
+ break;
+ case IPPROTO_UDP:
+ pg = nat64lsn_get_pg(
+ &link->alias->udp_chunkmask, link->alias->udp_pgmask,
+ link->alias->udp, &link->alias->udp_pg,
+ &link->alias->udp_pgidx, faddr);
+ break;
+ case IPPROTO_ICMP:
+ pg = nat64lsn_get_pg(
+ &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
+ link->alias->icmp, &link->alias->icmp_pg,
+ &link->alias->icmp_pgidx, faddr);
+ break;
+ default:
+ panic("%s: wrong proto %d", __func__, proto);
+ }
+ if (pg == NULL)
+ return (NULL);
- memset(plog, 0, sizeof(*plog));+ /* Check that PG has some free states */
- plog->length = PFLOG_REAL_HDRLEN;+ state = NULL;
- plog->af = family;+ i = FREEMASK_BITCOUNT(pg, faddr);
- plog->action = PF_NAT;+ while (i-- > 0) {
- plog->dir = PF_IN;+ offset = FREEMASK_FFSLL(pg, faddr);
- plog->rulenr = htonl(n);+ if (offset == 0) {
- plog->subrulenr = htonl(sn);+ /*
- plog->ruleset[0] = '\0';+ * We lost the race.
- strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));+ * No more free states in this PG.
- ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);+ */
+ break;
+ }
+
+ /* Lets try to atomically grab the state */
+ if (FREEMASK_BTR(pg, faddr, offset - 1)) {
+ state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
+ /* Initialize */
+ state->flags = proto != IPPROTO_TCP ? 0 :
+ convert_tcp_flags(f_id->_flags);
+ state->proto = proto;
+ state->aport = pg->base_port + offset - 1;
+ state->dport = f_id->dst_port;
+ state->sport = port;
+ state->ip6_dst = f_id->dst_ip6;
+ state->ip_dst = faddr;
+ state->ip_src = link->alias->addr;
+ state->hval = hval;
+ state->host = host;
+ SET_AGE(state->timestamp);
+
+ /* Insert new state into host's hash table */
+ HOST_LOCK(host);
+ CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
+ state, entries);
+ host->states_count++;
+ /*
+ * XXX: In case if host is going to be expired,
+ * reset NAT64LSN_DEADHOST flag.
+ */
+ host->flags &= ~NAT64LSN_DEADHOST;
+ HOST_UNLOCK(host);
+ NAT64STAT_INC(&cfg->base.stats, screated);
+ /* Mark the state as ready for translate4 */
+ ck_pr_fence_store();
+ ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
+ break;
+ }
+ }
+ return (state);
} }
+
/* /*
* Inspects icmp packets to see if the message contains different * Inspects icmp packets to see if the message contains different
* packet header so we need to alter @addr and @port. * packet header so we need to alter @addr and @port.
*/ */
static int static int
-inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,+inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
uint16_t *port) uint16_t *port)
{ {
+ struct icmp *icmp;
struct ip *ip; struct ip *ip;
- struct tcphdr *tcp;
- struct udphdr *udp;
- struct icmphdr *icmp;
int off; int off;
- uint8_t proto;+ uint8_t inner_proto;
- ip = mtod(*m, struct ip *); /* Outer IP header */+ ip = mtod(*mp, struct ip *); /* Outer IP header */
off = (ip->ip_hl << 2) + ICMP_MINLEN; off = (ip->ip_hl << 2) + ICMP_MINLEN;
- if ((*m)->m_len < off)+ if ((*mp)->m_len < off)
- *m = m_pullup(*m, off);+ *mp = m_pullup(*mp, off);
- if (*m == NULL)+ if (*mp == NULL)
return (ENOMEM); return (ENOMEM);
- ip = mtod(*m, struct ip *); /* Outer IP header */+ ip = mtod(*mp, struct ip *); /* Outer IP header */
- icmp = L3HDR(ip, struct icmphdr *);+ icmp = L3HDR(ip, struct icmp *);
switch (icmp->icmp_type) { switch (icmp->icmp_type) {
case ICMP_ECHO: case ICMP_ECHO:
case ICMP_ECHOREPLY: case ICMP_ECHOREPLY:
/* Use icmp ID as distinguisher */ /* Use icmp ID as distinguisher */
- *port = ntohs(*((uint16_t *)(icmp + 1)));+ *port = ntohs(icmp->icmp_id);
return (0); return (0);
case ICMP_UNREACH: case ICMP_UNREACH:
case ICMP_TIMXCEED: case ICMP_TIMXCEED:
@@ -266,90 +446,133 @@ inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
* ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
* of ULP header. * of ULP header.
*/ */
- if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)+ if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
return (EINVAL); return (EINVAL);
- if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)+ if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
- *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);+ *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
- if (*m == NULL)+ if (*mp == NULL)
return (ENOMEM); return (ENOMEM);
- ip = mtodo(*m, off); /* Inner IP header */+ ip = mtodo(*mp, off); /* Inner IP header */
- proto = ip->ip_p;+ inner_proto = ip->ip_p;
off += ip->ip_hl << 2; /* Skip inner IP header */ off += ip->ip_hl << 2; /* Skip inner IP header */
*addr = ntohl(ip->ip_src.s_addr); *addr = ntohl(ip->ip_src.s_addr);
- if ((*m)->m_len < off + ICMP_MINLEN)+ if ((*mp)->m_len < off + ICMP_MINLEN)
- *m = m_pullup(*m, off + ICMP_MINLEN);+ *mp = m_pullup(*mp, off + ICMP_MINLEN);
- if (*m == NULL)+ if (*mp == NULL)
return (ENOMEM); return (ENOMEM);
- switch (proto) {+ switch (inner_proto) {
case IPPROTO_TCP: case IPPROTO_TCP:
- tcp = mtodo(*m, off);
- *nat_proto = NAT_PROTO_TCP;
- *port = ntohs(tcp->th_sport);
- return (0);
case IPPROTO_UDP: case IPPROTO_UDP:
- udp = mtodo(*m, off);+ /* Copy source port from the header */
- *nat_proto = NAT_PROTO_UDP;+ *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
- *port = ntohs(udp->uh_sport);+ *proto = inner_proto;
return (0); return (0);
case IPPROTO_ICMP: case IPPROTO_ICMP:
/* /*
* We will translate only ICMP errors for our ICMP * We will translate only ICMP errors for our ICMP
* echo requests. * echo requests.
*/ */
- icmp = mtodo(*m, off);+ icmp = mtodo(*mp, off);
if (icmp->icmp_type != ICMP_ECHO) if (icmp->icmp_type != ICMP_ECHO)
return (EOPNOTSUPP); return (EOPNOTSUPP);
- *port = ntohs(*((uint16_t *)(icmp + 1)));+ *port = ntohs(icmp->icmp_id);
return (0); return (0);
}; };
return (EOPNOTSUPP); return (EOPNOTSUPP);
} }
-static inline uint8_t+static struct nat64lsn_state*
-convert_tcp_flags(uint8_t flags)+nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
+ in_addr_t faddr, uint16_t port, uint8_t proto)
{ {
- uint8_t result;+ struct nat64lsn_state *state;
+ struct nat64lsn_pg *pg;
+ int chunk_idx, pg_idx, state_idx;
- result = flags & (TH_FIN|TH_SYN);+ NAT64LSN_EPOCH_ASSERT();
- result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
- result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
- return (result);+ if (port < NAT64_MIN_PORT)
+ return (NULL);
+ /*
+ * Alias keeps 32 pgchunks for each protocol.
+ * Each pgchunk has 32 pointers to portgroup.
+ * Each portgroup has 64 states for ports.
+ */
+ port -= NAT64_MIN_PORT;
+ chunk_idx = port / 2048;
+
+ port -= chunk_idx * 2048;
+ pg_idx = port / 64;
+ state_idx = port % 64;
+
+ /*
+ * First check in proto_chunkmask that we have allocated PG chunk.
+ * Then check in proto_pgmask that we have valid PG pointer.
+ */
+ pg = NULL;
+ switch (proto) {
+ case IPPROTO_TCP:
+ if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
+ ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ case IPPROTO_UDP:
+ if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
+ ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->udp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ case IPPROTO_ICMP:
+ if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
+ ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ default:
+ panic("%s: wrong proto %d", __func__, proto);
+ }
+ if (pg == NULL)
+ return (NULL);
+
+ if (FREEMASK_ISSET(pg, faddr, state_idx))
+ return (NULL);
+
+ state = &STATES_CHUNK(pg, faddr)->state[state_idx];
+ ck_pr_fence_load();
+ if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
+ return (state);
+ return (NULL);
} }
-static NAT64NOINLINE int+static int
-nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,+nat64lsn_translate4(struct nat64lsn_cfg *cfg,
- struct mbuf **pm)+ const struct ipfw_flow_id *f_id, struct mbuf **mp)
{ {
struct pfloghdr loghdr, *logdata; struct pfloghdr loghdr, *logdata;
struct in6_addr src6; struct in6_addr src6;
- struct nat64lsn_portgroup *pg;+ struct nat64lsn_state *state;
- struct nat64lsn_host *nh;+ struct nat64lsn_alias *alias;
- struct nat64lsn_state *st;+ uint32_t addr, flags;
- struct ip *ip;+ uint16_t port, ts;
- uint32_t addr;
- uint16_t state_flags, state_ts;
- uint16_t port, lport;
- uint8_t nat_proto;
int ret; int ret;
+ uint8_t proto;
addr = f_id->dst_ip; addr = f_id->dst_ip;
port = f_id->dst_port; port = f_id->dst_port;
+ proto = f_id->proto;
if (addr < cfg->prefix4 || addr > cfg->pmask4) { if (addr < cfg->prefix4 || addr > cfg->pmask4) {
NAT64STAT_INC(&cfg->base.stats, nomatch4); NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict); return (cfg->nomatch_verdict);
} }
- /* Check if protocol is supported and get its short id */+ /* Check if protocol is supported */
- nat_proto = nat64lsn_proto_map[f_id->proto];+ switch (proto) {
- if (nat_proto == 0) {+ case IPPROTO_ICMP:
- NAT64STAT_INC(&cfg->base.stats, noproto);+ ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
- return (cfg->nomatch_verdict);
- }
-
- /* We might need to handle icmp differently */
- if (nat_proto == NAT_PROTO_ICMP) {
- ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
if (ret != 0) { if (ret != 0) {
if (ret == ENOMEM) { if (ret == ENOMEM) {
NAT64STAT_INC(&cfg->base.stats, nomem); NAT64STAT_INC(&cfg->base.stats, nomem);
@@ -358,804 +581,640 @@ nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
NAT64STAT_INC(&cfg->base.stats, noproto); NAT64STAT_INC(&cfg->base.stats, noproto);
return (cfg->nomatch_verdict); return (cfg->nomatch_verdict);
} }
- /* XXX: Check addr for validity */
if (addr < cfg->prefix4 || addr > cfg->pmask4) { if (addr < cfg->prefix4 || addr > cfg->pmask4) {
NAT64STAT_INC(&cfg->base.stats, nomatch4); NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict); return (cfg->nomatch_verdict);
} }
+ /* FALLTHROUGH */
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ NAT64STAT_INC(&cfg->base.stats, noproto);
+ return (cfg->nomatch_verdict);
} }
- /* Calc portgroup offset w.r.t protocol */+ alias = &ALIAS_BYHASH(cfg, addr);
- pg = GET_PORTGROUP(cfg, addr, nat_proto, port);+ MPASS(addr == alias->addr);
- /* Check if this port is occupied by any portgroup */+ /* Check that we have state for this port */
- if (pg == NULL) {+ state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
+ port, proto);
+ if (state == NULL) {
NAT64STAT_INC(&cfg->base.stats, nomatch4); NAT64STAT_INC(&cfg->base.stats, nomatch4);
-#if 0
- DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
- _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
-#endif
return (cfg->nomatch_verdict); return (cfg->nomatch_verdict);
} }
/* TODO: Check flags to see if we need to do some static mapping */ /* TODO: Check flags to see if we need to do some static mapping */
- nh = pg->host;
-
- /* Prepare some fields we might need to update */
- SET_AGE(state_ts);
- ip = mtod(*pm, struct ip *);
- if (ip->ip_p == IPPROTO_TCP)
- state_flags = convert_tcp_flags(
- L3HDR(ip, struct tcphdr *)->th_flags);
- else
- state_flags = 0;
-
- /* Lock host and get port mapping */
- NAT64_LOCK(nh);
- st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];+ /* Update some state fields if need */
- if (st->timestamp != state_ts)+ SET_AGE(ts);
- st->timestamp = state_ts;+ if (f_id->proto == IPPROTO_TCP)
- if ((st->flags & state_flags) != state_flags)+ flags = convert_tcp_flags(f_id->_flags);
- st->flags |= state_flags;+ else
- lport = htons(st->u.s.lport);+ flags = 0;
+ if (state->timestamp != ts)
+ state->timestamp = ts;
+ if ((state->flags & flags) != flags)
+ state->flags |= flags;
- NAT64_UNLOCK(nh);+ port = htons(state->sport);
+ src6 = state->ip6_dst;
if (cfg->base.flags & NAT64_LOG) { if (cfg->base.flags & NAT64_LOG) {
logdata = &loghdr; logdata = &loghdr;
- nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off);+ nat64lsn_log(logdata, *mp, AF_INET, state);
} else } else
logdata = NULL; logdata = NULL;
+ /*
+ * We already have src6 with embedded address, but it is possible,
+ * that src_ip is different than state->ip_dst, this is why we
+ * do embedding again.
+ */
nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip)); nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
- ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,+ ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
&cfg->base, logdata); &cfg->base, logdata);
-
if (ret == NAT64SKIP) if (ret == NAT64SKIP)
return (cfg->nomatch_verdict); return (cfg->nomatch_verdict);
- if (ret == NAT64MFREE)+ if (ret == NAT64RETURN)
- m_freem(*pm);+ *mp = NULL;
- *pm = NULL;
-
return (IP_FW_DENY); return (IP_FW_DENY);
} }
-void
-nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
- const char *px, int off)
-{
- char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
-
- if ((V_nat64_debug & DP_STATE) == 0)
- return;
- inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
- inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
- inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
-
- DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> "
- "%s:%d AGE %d", px, pg->idx, st, off,
- s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
- d, st->u.s.fport, GET_AGE(st->timestamp));
-}
-
/* /*
- * Check if particular TCP state is stale and should be deleted.+ * Check if particular state is stale and should be deleted.
* Return 1 if true, 0 otherwise. * Return 1 if true, 0 otherwise.
*/ */
static int static int
-nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,+nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
- const struct nat64lsn_state *st, int age)
{ {
- int ttl;+ int age, ttl;
-
- if (st->flags & NAT64_FLAG_FIN)
- ttl = cfg->st_close_ttl;
- else if (st->flags & NAT64_FLAG_ESTAB)
- ttl = cfg->st_estab_ttl;
- else if (st->flags & NAT64_FLAG_SYN)
- ttl = cfg->st_syn_ttl;
- else
- ttl = cfg->st_syn_ttl;
- if (age > ttl)+ /* State was marked as stale in previous pass. */
+ if (ISSET32(state->flags, NAT64_BIT_STALE))
return (1); return (1);
- return (0);
-}
-
-/*
- * Check if nat state @st is stale and should be deleted.
- * Return 1 if true, 0 otherwise.
- */
-static NAT64NOINLINE int
-nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
-{
- int age, delete;
- age = GET_AGE(st->timestamp);+ /* State is not yet initialized, it is going to be READY */
- delete = 0;+ if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
-
- /* Skip immutable records */
- if (st->flags & NAT64_FLAG_RDR)
return (0); return (0);
- switch (pg->nat_proto) {+ age = GET_AGE(state->timestamp);
- case NAT_PROTO_TCP:+ switch (state->proto) {
- delete = nat64lsn_periodic_check_tcp(cfg, st, age);+ case IPPROTO_TCP:
- break;+ if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
- case NAT_PROTO_UDP:+ ttl = cfg->st_close_ttl;
- if (age > cfg->st_udp_ttl)+ else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
- delete = 1;+ ttl = cfg->st_estab_ttl;
- break;+ else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
- case NAT_PROTO_ICMP:+ ttl = cfg->st_syn_ttl;
- if (age > cfg->st_icmp_ttl)+ else
- delete = 1;+ ttl = cfg->st_syn_ttl;
- break;+ if (age > ttl)
+ return (1);
+ break;
+ case IPPROTO_UDP:
+ if (age > cfg->st_udp_ttl)
+ return (1);
+ break;
+ case IPPROTO_ICMP:
+ if (age > cfg->st_icmp_ttl)
+ return (1);
+ break;
} }
-+ return (0);
- return (delete);
} }
-+static int
-/*+nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
- * The following structures and functions
- * are used to perform SLIST_FOREACH_SAFE()
- * analog for states identified by struct st_ptr.
- */
-
-struct st_idx {
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
- struct st_ptr sidx_next;
-};
-
-static struct st_idx *
-st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
- struct st_ptr *sidx, struct st_idx *si)
{ {
- struct nat64lsn_portgroup *pg;+ struct nat64lsn_state *state;
- struct nat64lsn_state *st;+ struct nat64lsn_host *host;
-+ uint64_t freemask;
- if (sidx->idx == 0) {+ int c, i, update_age;
- memset(si, 0, sizeof(*si));+
- return (si);+ update_age = 0;
+ for (c = 0; c < pg->chunks_count; c++) {
+ FREEMASK_COPY(pg, c, freemask);
+ for (i = 0; i < 64; i++) {
+ if (ISSET64(freemask, i))
+ continue;
+ state = &STATES_CHUNK(pg, c)->state[i];
+ if (nat64lsn_check_state(cfg, state) == 0) {
+ update_age = 1;
+ continue;
+ }
+ /*
+ * Expire state:
+ * 1. Mark as STALE and unlink from host's hash.
+ * 2. Set bit in freemask.
+ */
+ if (ISSET32(state->flags, NAT64_BIT_STALE)) {
+ /*
+ * State was marked as STALE in previous
+ * pass. Now it is safe to release it.
+ */
+ state->flags = 0;
+ ck_pr_fence_store();
+ FREEMASK_BTS(pg, c, i);
+ NAT64STAT_INC(&cfg->base.stats, sdeleted);
+ continue;
+ }
+ MPASS(state->flags & NAT64_FLAG_READY);
+
+ host = state->host;
+ HOST_LOCK(host);
+ CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
+ state, nat64lsn_state, entries);
+ host->states_count--;
+ HOST_UNLOCK(host);
+
+ /* Reset READY flag */
+ ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
+ /* And set STALE flag */
+ ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
+ ck_pr_fence_store();
+ /*
+ * Now translate6 will not use this state, wait
+ * until it become safe for translate4, then mark
+ * state as free.
+ */
+ }
} }
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx);+ /*
- st = &pg->states[sidx->off];+ * We have some alive states, update timestamp.
+ */
+ if (update_age)
+ SET_AGE(pg->timestamp);
- si->pg = pg;+ if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
- si->st = st;+ return (0);
- si->sidx_next = st->next;
- return (si);+ return (1);
} }
-static struct st_idx *+static void
-st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,+nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
- struct st_idx *si)+ struct nat64lsn_pg_slist *portgroups)
{ {
- struct st_ptr sidx;+ struct nat64lsn_alias *alias;
- struct nat64lsn_portgroup *pg;+ struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
- struct nat64lsn_state *st;+ uint32_t *pgmask, *pgidx;
-+ int i, idx;
- sidx = si->sidx_next;+
- if (sidx.idx == 0) {+ for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
- memset(si, 0, sizeof(*si));+ alias = &cfg->aliases[i];
- si->st = NULL;+ CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
- si->pg = NULL;+ if (nat64lsn_maintain_pg(cfg, pg) == 0)
- return (si);+ continue;
+ /* Always keep first PG */
+ if (pg->base_port == NAT64_MIN_PORT)
+ continue;
+ /*
+ * PG is expired, unlink it and schedule for
+ * deferred destroying.
+ */
+ idx = (pg->base_port - NAT64_MIN_PORT) / 64;
+ switch (pg->proto) {
+ case IPPROTO_TCP:
+ pgmask = alias->tcp_pgmask;
+ pgptr = &alias->tcp_pg;
+ pgidx = &alias->tcp_pgidx;
+ firstpg = alias->tcp[0]->pgptr[0];
+ break;
+ case IPPROTO_UDP:
+ pgmask = alias->udp_pgmask;
+ pgptr = &alias->udp_pg;
+ pgidx = &alias->udp_pgidx;
+ firstpg = alias->udp[0]->pgptr[0];
+ break;
+ case IPPROTO_ICMP:
+ pgmask = alias->icmp_pgmask;
+ pgptr = &alias->icmp_pg;
+ pgidx = &alias->icmp_pgidx;
+ firstpg = alias->icmp[0]->pgptr[0];
+ break;
+ }
+ /* Reset the corresponding bit in pgmask array. */
+ ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
+ ck_pr_fence_store();
+ /* If last used PG points to this PG, reset it. */
+ ck_pr_cas_ptr(pgptr, pg, firstpg);
+ ck_pr_cas_32(pgidx, idx, 0);
+ /* Unlink PG from alias's chain */
+ ALIAS_LOCK(alias);
+ CK_SLIST_REMOVE(&alias->portgroups, pg,
+ nat64lsn_pg, entries);
+ alias->portgroups_count--;
+ ALIAS_UNLOCK(alias);
+ /* And link to job's chain for deferred destroying */
+ NAT64STAT_INC(&cfg->base.stats, spgdeleted);
+ CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
+ }
} }
-
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- st = &pg->states[sidx.off];
-
- si->pg = pg;
- si->st = st;
- si->sidx_next = st->next;
-
- return (si);
} }
-static struct st_idx *+static void
-st_save_cond(struct st_idx *si_dst, struct st_idx *si)+nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
-{+ struct nat64lsn_hosts_slist *hosts)
- if (si->st != NULL)
- *si_dst = *si;
-
- return (si_dst);
-}
-
-unsigned int
-nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
{ {
- struct st_idx si, si_prev;+ struct nat64lsn_host *host, *tmp;
int i; int i;
- unsigned int delcount;+
-+ for (i = 0; i < cfg->hosts_hashsize; i++) {
- delcount = 0;+ CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
- for (i = 0; i < nh->hsize; i++) {+ entries, tmp) {
- memset(&si_prev, 0, sizeof(si_prev));+ /* Is host was marked in previous call? */
- for (st_first(cfg, nh, &nh->phash[i], &si);+ if (host->flags & NAT64LSN_DEADHOST) {
- si.st != NULL;+ if (host->states_count > 0) {
- st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {+ host->flags &= ~NAT64LSN_DEADHOST;
- if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)+ continue;
+ }
+ /*
+ * Unlink host from hash table and schedule
+ * it for deferred destroying.
+ */
+ CFG_LOCK(cfg);
+ CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
+ nat64lsn_host, entries);
+ cfg->hosts_count--;
+ CFG_UNLOCK(cfg);
+ CK_SLIST_INSERT_HEAD(hosts, host, entries);
+ continue;
+ }
+ if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
+ continue;
+ if (host->states_count > 0)
continue; continue;
- nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",+ /* Mark host as going to be expired in next pass */
- si.st->cur.off);+ host->flags |= NAT64LSN_DEADHOST;
- /* Unlink from hash */+ ck_pr_fence_store();
- if (si_prev.st != NULL)
- si_prev.st->next = si.st->next;
- else
- nh->phash[i] = si.st->next;
- /* Delete state and free its data */
- PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
- memset(si.st, 0, sizeof(struct nat64lsn_state));
- si.st = NULL;
- delcount++;
-
- /* Update portgroup timestamp */
- SET_AGE(si.pg->timestamp);
} }
} }
- NAT64STAT_ADD(&cfg->base.stats, sdeleted, delcount);
- return (delcount);
-}
-
-/*
- * Checks if portgroup is not used and can be deleted,
- * Returns 1 if stale, 0 otherwise
- */
-static int
-stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
-{
-
- if (!PG_IS_EMPTY(pg))
- return (0);
- if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
- return (0);
- return (1);
} }
-/*+static struct nat64lsn_pgchunk*
- * Checks if host record is not used and can be deleted,+nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
- * Returns 1 if stale, 0 otherwise
- */
-static int
-stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
{ {
-+#if 0
- if (nh->pg_used != 0)+ struct nat64lsn_alias *alias;
- return (0);+ struct nat64lsn_pgchunk *chunk;
- if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)+ uint32_t pgmask;
- return (0);+ int i, c;
- return (1);+
-}+ for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
-+ alias = &cfg->aliases[i];
-struct nat64lsn_periodic_data {+ if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
- struct nat64lsn_cfg *cfg;+ continue;
- struct nat64lsn_job_head jhead;+ /* Always keep single chunk allocated */
- int jlen;+ for (c = 1; c < 32; c++) {
-};+ if ((alias->tcp_chunkmask & (1 << c)) == 0)
-+ break;
-static NAT64NOINLINE int+ chunk = ck_pr_load_ptr(&alias->tcp[c]);
-nat64lsn_periodic_chkhost(struct nat64lsn_host *nh,+ if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
- struct nat64lsn_periodic_data *d)
-{
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_job_item *ji;
- uint64_t delmask[NAT64LSN_PGPTRNMASK];
- int delcount, i;
-
- delcount = 0;
- memset(delmask, 0, sizeof(delmask));
-
- if (V_nat64_debug & DP_JQUEUE) {
- char a[INET6_ADDRSTRLEN];
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d",
- stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
- }
- if (!stale_nh(d->cfg, nh)) {
- /* Non-stale host. Inspect internals */
- NAT64_LOCK(nh);
-
- /* Stage 1: Check&expire states */
- if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
- SET_AGE(nh->timestamp);
-
- /* Stage 2: Check if we need to expire */
- for (i = 0; i < nh->pg_used; i++) {
- pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
- if (pg == NULL)
continue; continue;
-+ ck_pr_btr_32(&alias->tcp_chunkmask, c);
- /* Check if we can delete portgroup */+ ck_pr_fence_load();
- if (stale_pg(d->cfg, pg) == 0)+ if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
continue; continue;
-
- DPRINTF(DP_JQUEUE, "Check PG %d", i);
- delmask[i / 64] |= ((uint64_t)1 << (i % 64));
- delcount++;
} }
-
- NAT64_UNLOCK(nh);
- if (delcount == 0)
- return (0);
} }
+#endif
+ return (NULL);
+}
- DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount);+#if 0
- /* We have something to delete - add it to queue */+static void
- ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);+nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
- if (ji == NULL)+{
- return (0);+ struct nat64lsn_host *h;
-+ struct nat64lsn_states_slist *hash;
- ji->haddr = nh->addr;+ int i, j, hsize;
- ji->delcount = delcount;+
- memcpy(ji->delmask, delmask, sizeof(ji->delmask));+ for (i = 0; i < cfg->hosts_hashsize; i++) {
-+ CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
- TAILQ_INSERT_TAIL(&d->jhead, ji, next);+ if (h->states_count / 2 < h->states_hashsize ||
- d->jlen++;+ h->states_hashsize >= NAT64LSN_MAX_HSIZE)
- return (0);+ continue;
+ hsize = h->states_hashsize * 2;
+ hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
+ if (hash == NULL)
+ continue;
+ for (j = 0; j < hsize; j++)
+ CK_SLIST_INIT(&hash[i]);
+
+ ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
+ }
+ }
} }
+#endif
/* /*
* This procedure is used to perform various maintance * This procedure is used to perform various maintance
- * on dynamic hash list. Currently it is called every second.+ * on dynamic hash list. Currently it is called every 4 seconds.
*/ */
static void static void
nat64lsn_periodic(void *data) nat64lsn_periodic(void *data)
{ {
- struct ip_fw_chain *ch;+ struct nat64lsn_job_item *ji;
- IPFW_RLOCK_TRACKER;
struct nat64lsn_cfg *cfg; struct nat64lsn_cfg *cfg;
- struct nat64lsn_periodic_data d;
- struct nat64lsn_host *nh, *tmp;
cfg = (struct nat64lsn_cfg *) data; cfg = (struct nat64lsn_cfg *) data;
- ch = cfg->ch;
CURVNET_SET(cfg->vp); CURVNET_SET(cfg->vp);
-+ if (cfg->hosts_count > 0) {
- memset(&d, 0, sizeof(d));+ ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
- d.cfg = cfg;+ if (ji != NULL) {
- TAILQ_INIT(&d.jhead);+ ji->jtype = JTYPE_DESTROY;
-+ CK_SLIST_INIT(&ji->hosts);
- IPFW_RLOCK(ch);+ CK_SLIST_INIT(&ji->portgroups);
-+ nat64lsn_expire_hosts(cfg, &ji->hosts);
- /* Stage 1: foreach host, check all its portgroups */+ nat64lsn_expire_portgroups(cfg, &ji->portgroups);
- I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);+ ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
-+ NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
- /* Enqueue everything we have requested */+ nat64lsn_job_destroy);
- nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);+ } else
-+ NAT64STAT_INC(&cfg->base.stats, jnomem);
+ }
callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY); callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
-
- IPFW_RUNLOCK(ch);
-
CURVNET_RESTORE(); CURVNET_RESTORE();
} }
-static NAT64NOINLINE void+#define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
-reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)+#define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
-{+#define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
-+static int
- if (ji->m == NULL)+nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
- return;
-
- /* Request has failed or packet type is wrong */
- if (ji->f_id.addr_type != 6 || ji->done == 0) {
- m_freem(ji->m);
- ji->m = NULL;
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d",
- ji->jtype, ji->done);
- return;
- }
-
- /*
- * XXX: Limit recursion level
- */
-
- NAT64STAT_INC(&cfg->base.stats, jreinjected);
- DPRINTF(DP_JQUEUE, "Reinject mbuf");
- nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
-}
-
-static void
-destroy_portgroup(struct nat64lsn_portgroup *pg)
-{
-
- DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg);
- uma_zfree(nat64lsn_pg_zone, pg);
-}
-
-static NAT64NOINLINE int
-alloc_portgroup(struct nat64lsn_job_item *ji)
-{
- struct nat64lsn_portgroup *pg;
-
- pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
- if (pg == NULL)
- return (1);
-
- if (ji->needs_idx != 0) {
- ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
- /* Failed alloc isn't always fatal, so don't check */
- }
- memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
- pg->nat_proto = ji->nat_proto;
- ji->pg = pg;
- return (0);
-
-}
-
-static void
-destroy_host6(struct nat64lsn_host *nh)
{ {
char a[INET6_ADDRSTRLEN]; char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_aliaslink *link;
+ struct nat64lsn_host *host;
+ struct nat64lsn_state *state;
+ uint32_t hval, data[2];
int i; int i;
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));+ /* Check that host was not yet added. */
- DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh,+ NAT64LSN_EPOCH_ASSERT();
- nh->pg_used);+ CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
- NAT64_LOCK_DESTROY(nh);+ if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
- for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++)+ /* The host was allocated in previous call. */
- uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i));+ ji->host = host;
- uma_zfree(nat64lsn_host_zone, nh);+ goto get_state;
-}+ }
-
-static NAT64NOINLINE int
-alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
-{
- struct nat64lsn_host *nh;
- char a[INET6_ADDRSTRLEN];
-
- nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
- if (nh == NULL)
- return (1);
- PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
- if (PORTGROUP_CHUNK(nh, 0) == NULL) {
- uma_zfree(nat64lsn_host_zone, nh);
- return (2);
- }
- if (alloc_portgroup(ji) != 0) {
- NAT64STAT_INC(&cfg->base.stats, jportfails);
- uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0));
- uma_zfree(nat64lsn_host_zone, nh);
- return (3);
} }
- NAT64_LOCK_INIT(nh);+ host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
- nh->addr = ji->haddr;+ if (ji->host == NULL)
- nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */+ return (HOST_ERROR(1));
- nh->pg_allocated = NAT64LSN_PGIDX_CHUNK;
- nh->pg_used = 0;
- ji->nh = nh;
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh);
- return (0);
-}
-
-/*
- * Finds free @pg index inside @nh
- */
-static NAT64NOINLINE int
-find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx)
-{
- int i;
- for (i = 0; i < nh->pg_allocated; i++) {+ host->states_hashsize = NAT64LSN_HSIZE;
- if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) {+ host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
- *idx = i;+ host->states_hashsize, M_NAT64LSN, M_NOWAIT);
- return (0);+ if (host->states_hash == NULL) {
- }+ uma_zfree(nat64lsn_host_zone, host);
+ return (HOST_ERROR(2));
} }
- return (1);
-}
-static NAT64NOINLINE int+ link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
-attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)+ if (link == NULL) {
-{+ free(host->states_hash, M_NAT64LSN);
- char a[INET6_ADDRSTRLEN];+ uma_zfree(nat64lsn_host_zone, host);
- struct nat64lsn_host *nh;+ return (HOST_ERROR(3));
-
- I6HASH_FIND(cfg, nh, &ji->haddr);
- if (nh == NULL) {
- /* Add new host to list */
- nh = ji->nh;
- I6HASH_INSERT(cfg, nh);
- cfg->ihcount++;
- ji->nh = NULL;
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh);
- /*
- * Try to add portgroup.
- * Note it will automatically set
- * 'done' on ji if successful.
- */
- if (attach_portgroup(cfg, ji) != 0) {
- DPRINTF(DP_DROPS, "%s %p failed to attach PG",
- a, nh);
- NAT64STAT_INC(&cfg->base.stats, jportfails);
- return (1);
- }
- return (0);
} }
+ /* Initialize */
+ HOST_LOCK_INIT(host);
+ SET_AGE(host->timestamp);
+ host->addr = ji->f_id.src_ip6;
+ host->hval = ji->src6_hval;
+ host->flags = 0;
+ host->states_count = 0;
+ host->states_hashsize = NAT64LSN_HSIZE;
+ CK_SLIST_INIT(&host->aliases);
+ for (i = 0; i < host->states_hashsize; i++)
+ CK_SLIST_INIT(&host->states_hash[i]);
+
+ /* Determine alias from flow hash. */
+ hval = ALIASLINK_HVAL(cfg, &ji->f_id);
+ link->alias = &ALIAS_BYHASH(cfg, hval);
+ CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
+
+ ALIAS_LOCK(link->alias);
+ CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
+ link->alias->hosts_count++;
+ ALIAS_UNLOCK(link->alias);
+
+ CFG_LOCK(cfg);
+ CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
+ cfg->hosts_count++;
+ CFG_UNLOCK(cfg);
+
+get_state:
+ data[0] = ji->faddr;
+ data[1] = (ji->f_id.dst_port << 16) | ji->port;
+ ji->state_hval = hval = STATE_HVAL(cfg, data);
+ state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
+ ji->faddr, ji->port, ji->proto);
/* /*
- * nh isn't NULL. This probably means we had several simultaneous+ * We failed to obtain new state, used alias needs new PG.
- * host requests. The previous one request has already attached+ * XXX: or another alias should be used.
- * this host. Requeue attached mbuf and mark job as done, but
- * leave nh and pg pointers not changed, so nat64lsn_do_request()
- * will release all allocated resources.
*/ */
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));+ if (state == NULL) {
- DPRINTF(DP_OBJ, "%s %p is already attached as %p",+ /* Try to allocate new PG */
- a, ji->nh, nh);+ if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
+ return (HOST_ERROR(4));
+ /* We assume that nat64lsn_alloc_pg() got state */
+ } else
+ ji->state = state;
+
ji->done = 1; ji->done = 1;
- return (0);+ DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
+ inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
+ return (HOST_ERROR(0));
} }
-static NAT64NOINLINE int+static int
-find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,+nat64lsn_find_pg_place(uint32_t *data)
- int nat_proto, uint16_t *aport, int *ppg_idx)
{ {
- int j, pg_idx;+ int i;
-
- pg_idx = addr_off * _ADDR_PG_COUNT +
- (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
- for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {+ for (i = 0; i < 32; i++) {
- if (cfg->pg[pg_idx + j] != NULL)+ if (~data[i] == 0)
continue; continue;
-+ return (i * 32 + ffs(~data[i]) - 1);
- *aport = j * NAT64_CHUNK_SIZE;
- *ppg_idx = pg_idx + j;
- return (1);
- }
-
- return (0);
-}
-
-/*
- * XXX: This function needs to be rewritten to
- * use free bitmask for faster pg finding,
- * additionally, it should take into consideration
- * a) randomization and
- * b) previous addresses allocated to given nat instance
- *
- */
-static NAT64NOINLINE int
-find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
- uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
-{
- int i, nat_proto;
-
- /*
- * XXX: Use bitmask index to be able to find/check if IP address
- * has some spare pg's
- */
- nat_proto = ji->nat_proto;
-
- /* First, try to use same address */
- if (ji->aaddr != 0) {
- i = ntohl(ji->aaddr) - cfg->prefix4;
- if (find_pg_place_addr(cfg, i, nat_proto, aport,
- ppg_idx) != 0){
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
- }
- }
-
- /* Next, try to use random address based on flow hash */
- i = ji->fhash % (1 << (32 - cfg->plen4));
- if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
- }
-
-
- /* Last one: simply find ANY available */
- for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
- if (find_pg_place_addr(cfg, i, nat_proto, aport,
- ppg_idx) != 0){
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
- }
} }
-+ return (-1);
- return (1);
} }
-static NAT64NOINLINE int+static int
-attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)+nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_alias *alias, uint32_t *chunkmask,
+ uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
+ struct nat64lsn_pg **pgptr, uint8_t proto)
{ {
- char a[INET6_ADDRSTRLEN];+ struct nat64lsn_pg *pg;
- struct nat64lsn_portgroup *pg;+ int i, pg_idx, chunk_idx;
- struct nat64lsn_host *nh;+
- uint32_t aaddr;+ /* Find place in pgchunk where PG can be added */
- uint16_t aport;+ pg_idx = nat64lsn_find_pg_place(pgmask);
- int nh_pg_idx, pg_idx;+ if (pg_idx < 0) /* no more PGs */
-+ return (PG_ERROR(1));
- pg = ji->pg;+ /* Check that we have allocated pgchunk for given PG index */
-+ chunk_idx = pg_idx / 32;
- /*+ if (!ISSET32(*chunkmask, chunk_idx)) {
- * Find source host and bind: we can't rely on+ chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
- * pg->host+ M_NOWAIT);
- */+ if (chunks[chunk_idx] == NULL)
- I6HASH_FIND(cfg, nh, &ji->haddr);+ return (PG_ERROR(2));
- if (nh == NULL)+ ck_pr_bts_32(chunkmask, chunk_idx);
- return (1);+ ck_pr_fence_store();
-
- /* Find spare port chunk */
- if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) {
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a);
- return (2);
- }
-
- /* Expand PG indexes if needed */
- if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) {
- PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) =
- ji->spare_idx;
- nh->pg_allocated += NAT64LSN_PGIDX_CHUNK;
- ji->spare_idx = NULL;
} }
-+ /* Allocate PG and states chunks */
- /* Find empty index to store PG in the @nh */+ pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
- if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) {+ if (pg == NULL)
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));+ return (PG_ERROR(3));
- DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s",+ pg->chunks_count = cfg->states_chunks;
- a);+ if (pg->chunks_count > 1) {
- return (3);+ pg->freemask_chunk = malloc(pg->chunks_count *
+ sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
+ if (pg->freemask_chunk == NULL) {
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(4));
+ }
+ pg->states_chunk = malloc(pg->chunks_count *
+ sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
+ M_NOWAIT | M_ZERO);
+ if (pg->states_chunk == NULL) {
+ free(pg->freemask_chunk, M_NAT64LSN);
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(5));
+ }
+ for (i = 0; i < pg->chunks_count; i++) {
+ pg->states_chunk[i] = uma_zalloc(
+ nat64lsn_state_zone, M_NOWAIT);
+ if (pg->states_chunk[i] == NULL)
+ goto states_failed;
+ }
+ memset(pg->freemask_chunk, 0xff,
+ sizeof(uint64_t) * pg->chunks_count);
+ } else {
+ pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
+ if (pg->states == NULL) {
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(6));
+ }
+ memset(&pg->freemask64, 0xff, sizeof(uint64_t));
} }
- cfg->pg[pg_idx] = pg;+ /* Initialize PG and hook it to pgchunk */
- cfg->protochunks[pg->nat_proto]++;
- NAT64STAT_INC(&cfg->base.stats, spgcreated);
-
- pg->aaddr = aaddr;
- pg->aport = aport;
- pg->host = nh;
- pg->idx = pg_idx;
SET_AGE(pg->timestamp); SET_AGE(pg->timestamp);
+ pg->proto = proto;
+ pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
+ ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
+ ck_pr_fence_store();
+ ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
+ ck_pr_store_ptr(pgptr, pg);
+
+ ALIAS_LOCK(alias);
+ CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
+ SET_AGE(alias->timestamp);
+ alias->portgroups_count++;
+ ALIAS_UNLOCK(alias);
+ NAT64STAT_INC(&cfg->base.stats, spgcreated);
+ return (PG_ERROR(0));
- PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg;+states_failed:
- if (nh->pg_used == nh_pg_idx)+ for (i = 0; i < pg->chunks_count; i++)
- nh->pg_used++;+ uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
- SET_AGE(nh->timestamp);+ free(pg->freemask_chunk, M_NAT64LSN);
-+ free(pg->states_chunk, M_NAT64LSN);
- ji->pg = NULL;+ uma_zfree(nat64lsn_pg_zone, pg);
- ji->done = 1;+ return (PG_ERROR(7));
-
- return (0);
} }
-static NAT64NOINLINE void+static int
-consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)+nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{ {
- struct nat64lsn_host *nh, *nh_tmp;+ struct nat64lsn_aliaslink *link;
- struct nat64lsn_portgroup *pg, *pg_list[256];+ struct nat64lsn_alias *alias;
- int i, pg_lidx, idx;+ int ret;
-
- /* Find source host */
- I6HASH_FIND(cfg, nh, &ji->haddr);
- if (nh == NULL || nh->pg_used == 0)
- return;
-
- memset(pg_list, 0, sizeof(pg_list));
- pg_lidx = 0;
-
- NAT64_LOCK(nh);
-
- for (i = nh->pg_used - 1; i >= 0; i--) {
- if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0)
- continue;
- pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
-
- /* Check that PG isn't busy. */
- if (stale_pg(cfg, pg) == 0)
- continue;
-
- /* DO delete */
- pg_list[pg_lidx++] = pg;
- PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL;
- idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,+ link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
- pg->aport);+ if (link == NULL)
- KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));+ return (PG_ERROR(1));
- cfg->pg[idx] = NULL;
- cfg->protochunks[pg->nat_proto]--;
- NAT64STAT_INC(&cfg->base.stats, spgdeleted);
- /* Decrease pg_used */+ /*
- while (nh->pg_used > 0 &&+ * TODO: check that we did not already allocated PG in
- PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL)+ * previous call.
- nh->pg_used--;+ */
- /* Check if on-stack buffer has ended */+ ret = 0;
- if (pg_lidx == nitems(pg_list))+ alias = link->alias;
- break;+ /* Find place in pgchunk where PG can be added */
+ switch (ji->proto) {
+ case IPPROTO_TCP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->tcp_chunkmask, alias->tcp_pgmask,
+ alias->tcp, &alias->tcp_pg, ji->proto);
+ break;
+ case IPPROTO_UDP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->udp_chunkmask, alias->udp_pgmask,
+ alias->udp, &alias->udp_pg, ji->proto);
+ break;
+ case IPPROTO_ICMP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->icmp_chunkmask, alias->icmp_pgmask,
+ alias->icmp, &alias->icmp_pg, ji->proto);
+ break;
+ default:
+ panic("%s: wrong proto %d", __func__, ji->proto);
} }
-+ if (ret == PG_ERROR(1)) {
- NAT64_UNLOCK(nh);+ /*
-+ * PG_ERROR(1) means that alias lacks free PGs
- if (stale_nh(cfg, nh)) {+ * XXX: try next alias.
- I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);+ */
- KASSERT(nh != NULL, ("Unable to find address"));+ printf("NAT64LSN: %s: failed to obtain PG\n",
- cfg->ihcount--;+ __func__);
- ji->nh = nh;+ return (ret);
- I6HASH_FIND(cfg, nh, &ji->haddr);
- KASSERT(nh == NULL, ("Failed to delete address"));
} }
-+ if (ret == PG_ERROR(0)) {
- /* TODO: Delay freeing portgroups */+ ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
- while (pg_lidx > 0) {+ ji->state_hval, ji->faddr, ji->port, ji->proto);
- pg_lidx--;+ if (ji->state == NULL)
- NAT64STAT_INC(&cfg->base.stats, spgdeleted);+ ret = PG_ERROR(8);
- destroy_portgroup(pg_list[pg_lidx]);+ else
+ ji->done = 1;
} }
+ return (ret);
} }
-/*+static void
- * Main request handler.+nat64lsn_do_request(void *data)
- * Responsible for handling jqueue, e.g.
- * creating new hosts, addind/deleting portgroups.
- */
-static NAT64NOINLINE void
-nat64lsn_do_request(void *data)
{ {
- IPFW_RLOCK_TRACKER;+ struct epoch_tracker et;
struct nat64lsn_job_head jhead; struct nat64lsn_job_head jhead;
- struct nat64lsn_job_item *ji;+ struct nat64lsn_job_item *ji, *ji2;
- int jcount, nhsize;+ struct nat64lsn_cfg *cfg;
- struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;+ int jcount;
- struct ip_fw_chain *ch;+ uint8_t flags;
- int delcount;
-
- CURVNET_SET(cfg->vp);
-
- TAILQ_INIT(&jhead);
- /* XXX: We're running unlocked here */+ cfg = (struct nat64lsn_cfg *)data;
+ if (cfg->jlen == 0)
+ return;
- ch = cfg->ch;+ CURVNET_SET(cfg->vp);
- delcount = 0;+ STAILQ_INIT(&jhead);
- IPFW_RLOCK(ch);
/* Grab queue */ /* Grab queue */
JQUEUE_LOCK(); JQUEUE_LOCK();
- TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);+ STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
jcount = cfg->jlen; jcount = cfg->jlen;
cfg->jlen = 0; cfg->jlen = 0;
JQUEUE_UNLOCK(); JQUEUE_UNLOCK();
- /* check if we need to resize hash */+ /* TODO: check if we need to resize hash */
- nhsize = 0;
- if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
- nhsize = cfg->ihsize;
- for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
- ;
- } else if (cfg->ihcount < cfg->ihsize * 4) {
- nhsize = cfg->ihsize;
- for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
- ;
- }
-
- IPFW_RUNLOCK(ch);
-
- if (TAILQ_EMPTY(&jhead)) {
- CURVNET_RESTORE();
- return;
- }
NAT64STAT_INC(&cfg->base.stats, jcalls); NAT64STAT_INC(&cfg->base.stats, jcalls);
DPRINTF(DP_JQUEUE, "count=%d", jcount); DPRINTF(DP_JQUEUE, "count=%d", jcount);
@@ -1169,442 +1228,283 @@ nat64lsn_do_request(void *data)
* TODO: Limit per-call number of items * TODO: Limit per-call number of items
*/ */
- /* Pre-allocate everything for entire chain */+ NAT64LSN_EPOCH_ENTER(et);
- TAILQ_FOREACH(ji, &jhead, next) {+ STAILQ_FOREACH(ji, &jhead, entries) {
switch (ji->jtype) { switch (ji->jtype) {
- case JTYPE_NEWHOST:+ case JTYPE_NEWHOST:
- if (alloc_host6(cfg, ji) != 0)+ if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
- NAT64STAT_INC(&cfg->base.stats,+ NAT64STAT_INC(&cfg->base.stats, jhostfails);
- jhostfails);+ break;
- break;+ case JTYPE_NEWPORTGROUP:
- case JTYPE_NEWPORTGROUP:+ if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
- if (alloc_portgroup(ji) != 0)+ NAT64STAT_INC(&cfg->base.stats, jportfails);
- NAT64STAT_INC(&cfg->base.stats,+ break;
- jportfails);+ default:
- break;+ continue;
- case JTYPE_DELPORTGROUP:
- delcount += ji->delcount;
- break;
- default:
- break;
} }
- }+ if (ji->done != 0) {
-+ flags = ji->proto != IPPROTO_TCP ? 0 :
- /*+ convert_tcp_flags(ji->f_id._flags);
- * TODO: Alloc hew hash+ nat64lsn_translate6_internal(cfg, &ji->m,
- */+ ji->state, flags);
- nhsize = 0;+ NAT64STAT_INC(&cfg->base.stats, jreinjected);
- if (nhsize > 0) {
- /* XXX: */
- }
-
- /* Apply all changes in batch */
- IPFW_UH_WLOCK(ch);
- IPFW_WLOCK(ch);
-
- TAILQ_FOREACH(ji, &jhead, next) {
- switch (ji->jtype) {
- case JTYPE_NEWHOST:
- if (ji->nh != NULL)
- attach_host6(cfg, ji);
- break;
- case JTYPE_NEWPORTGROUP:
- if (ji->pg != NULL &&
- attach_portgroup(cfg, ji) != 0)
- NAT64STAT_INC(&cfg->base.stats,
- jportfails);
- break;
- case JTYPE_DELPORTGROUP:
- consider_del_portgroup(cfg, ji);
- break;
} }
} }
+ NAT64LSN_EPOCH_EXIT(et);
- if (nhsize > 0) {+ ji = STAILQ_FIRST(&jhead);
- /* XXX: Move everything to new hash */+ while (ji != NULL) {
- }+ ji2 = STAILQ_NEXT(ji, entries);
-+ /*
- IPFW_WUNLOCK(ch);+ * In any case we must free mbuf if
- IPFW_UH_WUNLOCK(ch);+ * translator did not consumed it.
-+ */
- /* Flush unused entries */+ m_freem(ji->m);
- while (!TAILQ_EMPTY(&jhead)) {+ uma_zfree(nat64lsn_job_zone, ji);
- ji = TAILQ_FIRST(&jhead);+ ji = ji2;
- TAILQ_REMOVE(&jhead, ji, next);
- if (ji->nh != NULL)
- destroy_host6(ji->nh);
- if (ji->pg != NULL)
- destroy_portgroup(ji->pg);
- if (ji->m != NULL)
- reinject_mbuf(cfg, ji);
- if (ji->spare_idx != NULL)
- uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx);
- free(ji, M_IPFW);
} }
CURVNET_RESTORE(); CURVNET_RESTORE();
} }
-static NAT64NOINLINE struct nat64lsn_job_item *+static struct nat64lsn_job_item *
-nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,+nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
- int jtype)
{ {
struct nat64lsn_job_item *ji; struct nat64lsn_job_item *ji;
- struct in6_addr haddr;
- uint8_t nat_proto;
/* /*
- * Do not try to lock possibly contested mutex if we're near the limit.+ * Do not try to lock possibly contested mutex if we're near the
- * Drop packet instead.+ * limit. Drop packet instead.
*/ */
- if (cfg->jlen >= cfg->jmaxlen) {+ ji = NULL;
+ if (cfg->jlen >= cfg->jmaxlen)
NAT64STAT_INC(&cfg->base.stats, jmaxlen); NAT64STAT_INC(&cfg->base.stats, jmaxlen);
- return (NULL);+ else {
- }+ ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
-+ if (ji == NULL)
- memset(&haddr, 0, sizeof(haddr));+ NAT64STAT_INC(&cfg->base.stats, jnomem);
- nat_proto = 0;
- if (f_id != NULL) {
- haddr = f_id->src_ip6;
- nat_proto = nat64lsn_proto_map[f_id->proto];
-
- DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d",
- nat_proto, f_id->proto);
-
- if (nat_proto == 0)
- return (NULL);
} }
-
- ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW,
- M_NOWAIT | M_ZERO);
-
if (ji == NULL) { if (ji == NULL) {
- NAT64STAT_INC(&cfg->base.stats, jnomem);+ NAT64STAT_INC(&cfg->base.stats, dropped);
- return (NULL);+ DPRINTF(DP_DROPS, "failed to create job");
- }+ } else {
-+ ji->jtype = jtype;
- ji->jtype = jtype;+ ji->done = 0;
-
- if (f_id != NULL) {
- ji->f_id = *f_id;
- ji->haddr = haddr;
- ji->nat_proto = nat_proto;
} }
-
return (ji); return (ji);
} }
-static NAT64NOINLINE void+static void
nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{ {
- if (ji == NULL)
- return;
-
JQUEUE_LOCK(); JQUEUE_LOCK();
- TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);+ STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
- cfg->jlen++;
NAT64STAT_INC(&cfg->base.stats, jrequests); NAT64STAT_INC(&cfg->base.stats, jrequests);
+ cfg->jlen++;
if (callout_pending(&cfg->jcallout) == 0) if (callout_pending(&cfg->jcallout) == 0)
callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
JQUEUE_UNLOCK(); JQUEUE_UNLOCK();
} }
-static NAT64NOINLINE void+static void
-nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,+nat64lsn_job_destroy(epoch_context_t ctx)
- struct nat64lsn_job_head *jhead, int jlen)
-{
-
- if (TAILQ_EMPTY(jhead))
- return;
-
- /* Attach current queue to execution one */
- JQUEUE_LOCK();
- TAILQ_CONCAT(&cfg->jhead, jhead, next);
- cfg->jlen += jlen;
- NAT64STAT_ADD(&cfg->base.stats, jrequests, jlen);
-
- if (callout_pending(&cfg->jcallout) == 0)
- callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
- JQUEUE_UNLOCK();
-}
-
-static unsigned int
-flow6_hash(const struct ipfw_flow_id *f_id)
{ {
- unsigned char hbuf[36];+ struct nat64lsn_job_item *ji;
-+ struct nat64lsn_host *host;
- memcpy(hbuf, &f_id->dst_ip6, 16);+ struct nat64lsn_pg *pg;
- memcpy(&hbuf[16], &f_id->src_ip6, 16);+ int i;
- memcpy(&hbuf[32], &f_id->dst_port, 2);
- memcpy(&hbuf[32], &f_id->src_port, 2);
- return (djb_hash(hbuf, sizeof(hbuf)));+ ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
+ MPASS(ji->jtype == JTYPE_DESTROY);
+ while (!CK_SLIST_EMPTY(&ji->hosts)) {
+ host = CK_SLIST_FIRST(&ji->hosts);
+ CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
+ if (host->states_count > 0) {
+ /*
+ * XXX: The state has been created
+ * during host deletion.
+ */
+ printf("NAT64LSN: %s: destroying host with %d "
+ "states\n", __func__, host->states_count);
+ }
+ nat64lsn_destroy_host(host);
+ }
+ while (!CK_SLIST_EMPTY(&ji->portgroups)) {
+ pg = CK_SLIST_FIRST(&ji->portgroups);
+ CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
+ for (i = 0; i < pg->chunks_count; i++) {
+ if (FREEMASK_BITCOUNT(pg, i) != 64) {
+ /*
+ * XXX: The state has been created during
+ * PG deletion.
+ */
+ printf("NAT64LSN: %s: destroying PG %p "
+ "with non-empty chunk %d\n", __func__,
+ pg, i);
+ }
+ }
+ nat64lsn_destroy_pg(pg);
+ }
+ uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
+ uma_zfree(nat64lsn_job_zone, ji);
} }
-static NAT64NOINLINE int+static int
nat64lsn_request_host(struct nat64lsn_cfg *cfg, nat64lsn_request_host(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm)+ const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
+ in_addr_t faddr, uint16_t port, uint8_t proto)
{ {
struct nat64lsn_job_item *ji; struct nat64lsn_job_item *ji;
- struct mbuf *m;
- m = *pm;+ ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
- *pm = NULL;+ if (ji != NULL) {
+ ji->m = *mp;
+ ji->f_id = *f_id;
+ ji->faddr = faddr;
+ ji->port = port;
+ ji->proto = proto;
+ ji->src6_hval = hval;
- ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST);
- if (ji == NULL) {
- m_freem(m);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "failed to create job");
- } else {
- ji->m = m;
- /* Provide pseudo-random value based on flow */
- ji->fhash = flow6_hash(f_id);
nat64lsn_enqueue_job(cfg, ji); nat64lsn_enqueue_job(cfg, ji);
NAT64STAT_INC(&cfg->base.stats, jhostsreq); NAT64STAT_INC(&cfg->base.stats, jhostsreq);
+ *mp = NULL;
} }
-
return (IP_FW_DENY); return (IP_FW_DENY);
} }
-static NAT64NOINLINE int+static int
-nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,+nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
- const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,+ const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
- int needs_idx)+ in_addr_t faddr, uint16_t port, uint8_t proto)
{ {
struct nat64lsn_job_item *ji; struct nat64lsn_job_item *ji;
- struct mbuf *m;
- m = *pm;+ ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
- *pm = NULL;+ if (ji != NULL) {
+ ji->m = *mp;
+ ji->f_id = *f_id;
+ ji->faddr = faddr;
+ ji->port = port;
+ ji->proto = proto;
+ ji->state_hval = hval;
+ ji->host = host;
- ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
- if (ji == NULL) {
- m_freem(m);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "failed to create job");
- } else {
- ji->m = m;
- /* Provide pseudo-random value based on flow */
- ji->fhash = flow6_hash(f_id);
- ji->aaddr = aaddr;
- ji->needs_idx = needs_idx;
nat64lsn_enqueue_job(cfg, ji); nat64lsn_enqueue_job(cfg, ji);
NAT64STAT_INC(&cfg->base.stats, jportreq); NAT64STAT_INC(&cfg->base.stats, jportreq);
+ *mp = NULL;
} }
-
return (IP_FW_DENY); return (IP_FW_DENY);
} }
-static NAT64NOINLINE struct nat64lsn_state * +static int
-nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,+nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
- int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)+ struct nat64lsn_state *state, uint8_t flags)
{ {
- struct nat64lsn_portgroup *pg;+ struct pfloghdr loghdr, *logdata;
- struct nat64lsn_state *st;+ int ret;
- int i, hval, off;+ uint16_t ts;
-
- /* XXX: create additional bitmask for selecting proper portgroup */
- for (i = 0; i < nh->pg_used; i++) {
- pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
- if (pg == NULL)
- continue;
- if (*aaddr == 0)
- *aaddr = pg->aaddr;
- if (pg->nat_proto != nat_proto)
- continue;
-
- off = PG_GET_FREE_IDX(pg);
- if (off != 0) {
- /* We have found spare state. Use it */
- off--;
- PG_MARK_BUSY_IDX(pg, off);
- st = &pg->states[off];
-
- /*
- * Fill in new info. Assume state was zeroed.
- * Timestamp and flags will be filled by caller.
- */
- st->u.s = kst->u.s;
- st->cur.idx = i + 1;
- st->cur.off = off;
-
- /* Insert into host hash table */
- hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
- st->next = nh->phash[hval];
- nh->phash[hval] = st->cur;
-
- nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
- NAT64STAT_INC(&cfg->base.stats, screated);+ /* Update timestamp and flags if needed */
+ SET_AGE(ts);
+ if (state->timestamp != ts)
+ state->timestamp = ts;
+ if ((state->flags & flags) != 0)
+ state->flags |= flags;
- return (st);+ if (cfg->base.flags & NAT64_LOG) {
- }+ logdata = &loghdr;
- /* Saev last used alias affress */+ nat64lsn_log(logdata, *mp, AF_INET6, state);
- *aaddr = pg->aaddr;+ } else
- }+ logdata = NULL;
- return (NULL);+ ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
+ htons(state->aport), &cfg->base, logdata);
+ if (ret == NAT64SKIP)
+ return (cfg->nomatch_verdict);
+ if (ret == NAT64RETURN)
+ *mp = NULL;
+ return (IP_FW_DENY);
} }
-static NAT64NOINLINE int+static int
nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id, nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
- struct mbuf **pm)+ struct mbuf **mp)
{ {
- struct pfloghdr loghdr, *logdata;+ struct nat64lsn_state *state;
- char a[INET6_ADDRSTRLEN];+ struct nat64lsn_host *host;
- struct nat64lsn_host *nh;
- struct st_ptr sidx;
- struct nat64lsn_state *st, kst;
- struct nat64lsn_portgroup *pg;
struct icmp6_hdr *icmp6; struct icmp6_hdr *icmp6;
- uint32_t aaddr;+ uint32_t addr, hval, data[2];
- int action, hval, nat_proto, proto;+ int offset, proto;
- uint16_t aport, state_ts, state_flags;+ uint16_t port;
-+ uint8_t flags;
- /* Check if af/protocol is supported and get it short id */+
- nat_proto = nat64lsn_proto_map[f_id->proto];+ /* Check if protocol is supported */
- if (nat_proto == 0) {+ port = f_id->src_port;
+ proto = f_id->proto;
+ switch (f_id->proto) {
+ case IPPROTO_ICMPV6:
/* /*
- * Since we can be called from jobs handler, we need+ * For ICMPv6 echo reply/request we use icmp6_id as
- * to free mbuf by self, do not leave this task to+ * local port.
- * ipfw_check_packet().
*/ */
+ offset = 0;
+ proto = nat64_getlasthdr(*mp, &offset);
+ if (proto < 0) {
+ NAT64STAT_INC(&cfg->base.stats, dropped);
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ return (IP_FW_DENY);
+ }
+ if (proto == IPPROTO_ICMPV6) {
+ icmp6 = mtodo(*mp, offset);
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ICMP6_ECHO_REPLY)
+ port = ntohs(icmp6->icmp6_id);
+ }
+ proto = IPPROTO_ICMP;
+ /* FALLTHROUGH */
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
NAT64STAT_INC(&cfg->base.stats, noproto); NAT64STAT_INC(&cfg->base.stats, noproto);
- goto drop;+ return (cfg->nomatch_verdict);
} }
- /* Try to find host first */+ /* Extract IPv4 from destination IPv6 address */
- I6HASH_FIND(cfg, nh, &f_id->src_ip6);+ addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
+ if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
+ char a[INET_ADDRSTRLEN];
- if (nh == NULL)
- return (nat64lsn_request_host(cfg, f_id, pm));
-
- /* Fill-in on-stack state structure */
- kst.u.s.faddr = nat64_extract_ip4(&f_id->dst_ip6,
- cfg->base.plat_plen);
- if (kst.u.s.faddr == 0 ||
- nat64_check_private_ip4(&cfg->base, kst.u.s.faddr) != 0) {
- NAT64STAT_INC(&cfg->base.stats, dropped);
- goto drop;
- }
- kst.u.s.fport = f_id->dst_port;
- kst.u.s.lport = f_id->src_port;
-
- /* Prepare some fields we might need to update */
- hval = 0;
- proto = nat64_getlasthdr(*pm, &hval);
- if (proto < 0) {
NAT64STAT_INC(&cfg->base.stats, dropped); NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");+ DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
- goto drop;+ inet_ntop(AF_INET, &addr, a, sizeof(a)));
+ return (IP_FW_DENY); /* XXX: add extra stats? */
} }
- SET_AGE(state_ts);+ /* Try to find host */
- if (proto == IPPROTO_TCP)+ hval = HOST_HVAL(cfg, &f_id->src_ip6);
- state_flags = convert_tcp_flags(+ CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
- TCP(mtodo(*pm, hval))->th_flags);+ if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
- else
- state_flags = 0;
- if (proto == IPPROTO_ICMPV6) {
- /* Alter local port data */
- icmp6 = mtodo(*pm, hval);
- if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
- icmp6->icmp6_type == ICMP6_ECHO_REPLY)
- kst.u.s.lport = ntohs(icmp6->icmp6_id);
- }
-
- hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
- pg = NULL;
- st = NULL;
-
- /* OK, let's find state in host hash */
- NAT64_LOCK(nh);
- sidx = nh->phash[hval];
- int k = 0;
- while (sidx.idx != 0) {
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- st = &pg->states[sidx.off];
- //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
- //st->next.idx, st->next.off);
- if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
break; break;
- if (k++ > 1000) {
- DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n",
- sidx.idx, sidx.off, st->next.idx, st->next.off);
- DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d",
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)),
- nh, curcpu);
- k = 0;
- }
- sidx = st->next;
- }
-
- if (sidx.idx == 0) {
- aaddr = 0;
- st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
- if (st == NULL) {
- /* No free states. Request more if we can */
- if (nh->pg_used >= cfg->max_chunks) {
- /* Limit reached */
- DPRINTF(DP_DROPS, "PG limit reached "
- " for host %s (used %u, allocated %u, "
- "limit %u)", inet_ntop(AF_INET6,
- &nh->addr, a, sizeof(a)),
- nh->pg_used * NAT64_CHUNK_SIZE,
- nh->pg_allocated * NAT64_CHUNK_SIZE,
- cfg->max_chunks * NAT64_CHUNK_SIZE);
- NAT64_UNLOCK(nh);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- goto drop;
- }
- if ((nh->pg_allocated <=
- nh->pg_used + NAT64LSN_REMAININGPG) &&
- nh->pg_allocated < cfg->max_chunks)
- action = 1; /* Request new indexes */
- else
- action = 0;
- NAT64_UNLOCK(nh);
- //DPRINTF("No state, unlock for %p", nh);
- return (nat64lsn_request_portgroup(cfg, f_id,
- pm, aaddr, action));
- }
-
- /* We've got new state. */
- sidx = st->cur;
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- }
-
- /* Okay, state found */
-
- /* Update necessary fileds */
- if (st->timestamp != state_ts)
- st->timestamp = state_ts;
- if ((st->flags & state_flags) != 0)
- st->flags |= state_flags;
-
- /* Copy needed state data */
- aaddr = pg->aaddr;
- aport = htons(pg->aport + sidx.off);
-
- NAT64_UNLOCK(nh);
-
- if (cfg->base.flags & NAT64_LOG) {
- logdata = &loghdr;
- nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off);
- } else
- logdata = NULL;
-
- action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->base, logdata);
- if (action == NAT64SKIP)
- return (cfg->nomatch_verdict);
- if (action == NAT64MFREE) {
-drop:
- m_freem(*pm);
} }
- *pm = NULL; /* mark mbuf as consumed */+ /* We use IPv4 address in host byte order */
- return (IP_FW_DENY);+ addr = ntohl(addr);
+ if (host == NULL)
+ return (nat64lsn_request_host(cfg, f_id, mp,
+ hval, addr, port, proto));
+
+ flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
+
+ data[0] = addr;
+ data[1] = (f_id->dst_port << 16) | port;
+ hval = STATE_HVAL(cfg, data);
+ state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
+ port, proto);
+ if (state == NULL)
+ return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
+ port, proto));
+ return (nat64lsn_translate6_internal(cfg, mp, state, flags));
} }
/* /*
@@ -1614,49 +1514,58 @@ int
ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done) ipfw_insn *cmd, int *done)
{ {
- ipfw_insn *icmd;
struct nat64lsn_cfg *cfg; struct nat64lsn_cfg *cfg;
+ ipfw_insn *icmd;
int ret; int ret;
IPFW_RLOCK_ASSERT(ch); IPFW_RLOCK_ASSERT(ch);
- *done = 1; /* terminate the search */+ *done = 0; /* continue the search in case of failure */
icmd = cmd + 1; icmd = cmd + 1;
if (cmd->opcode != O_EXTERNAL_ACTION || if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != V_nat64lsn_eid || cmd->arg1 != V_nat64lsn_eid ||
icmd->opcode != O_EXTERNAL_INSTANCE || icmd->opcode != O_EXTERNAL_INSTANCE ||
(cfg = NAT64_LOOKUP(ch, icmd)) == NULL) (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
- return (0);+ return (IP_FW_DENY);
+
+ *done = 1; /* terminate the search */
switch (args->f_id.addr_type) { switch (args->f_id.addr_type) {
case 4: case 4:
ret = nat64lsn_translate4(cfg, &args->f_id, &args->m); ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
break; break;
case 6: case 6:
+ /*
+ * Check that destination IPv6 address matches our prefix6.
+ */
+ if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
+ memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
+ cfg->base.plat_plen / 8) != 0) {
+ ret = cfg->nomatch_verdict;
+ break;
+ }
ret = nat64lsn_translate6(cfg, &args->f_id, &args->m); ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
break; break;
default: default:
- return (cfg->nomatch_verdict);+ ret = cfg->nomatch_verdict;
} }
- return (ret);
-}
-
-static int
-nat64lsn_ctor_host(void *mem, int size, void *arg, int flags)
-{
- struct nat64lsn_host *nh;
- nh = (struct nat64lsn_host *)mem;+ if (ret != IP_FW_PASS && args->m != NULL) {
- memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr));+ m_freem(args->m);
- memset(nh->phash, 0, sizeof(nh->phash));+ args->m = NULL;
- return (0);+ }
+ return (ret);
} }
static int static int
-nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags)+nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
{ {
+ struct nat64lsn_states_chunk *chunk;
+ int i;
- memset(mem, 0, size);+ chunk = (struct nat64lsn_states_chunk *)mem;
+ for (i = 0; i < 64; i++)
+ chunk->state[i].flags = 0;
return (0); return (0);
} }
@@ -1664,109 +1573,181 @@ void
nat64lsn_init_internal(void) nat64lsn_init_internal(void)
{ {
- memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));+ nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
- /* Set up supported protocol map */+ sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
- nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;+ UMA_ALIGN_PTR, 0);
- nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;+ nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
- nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;+ sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
- nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
- /* Fill in reverse proto map */
- memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
- nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
- nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
- nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
-
- JQUEUE_LOCK_INIT();
- nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
- sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
- nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone",
- sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0); UMA_ALIGN_PTR, 0);
- nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone",+ nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
- sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK,+ sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
- nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);+ UMA_ALIGN_PTR, 0);
+ nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
+ sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
+ sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
+ NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
+ sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ JQUEUE_LOCK_INIT();
} }
void void
nat64lsn_uninit_internal(void) nat64lsn_uninit_internal(void)
{ {
+ /* XXX: epoch_task drain */
JQUEUE_LOCK_DESTROY(); JQUEUE_LOCK_DESTROY();
uma_zdestroy(nat64lsn_host_zone); uma_zdestroy(nat64lsn_host_zone);
+ uma_zdestroy(nat64lsn_pgchunk_zone);
uma_zdestroy(nat64lsn_pg_zone); uma_zdestroy(nat64lsn_pg_zone);
- uma_zdestroy(nat64lsn_pgidx_zone);+ uma_zdestroy(nat64lsn_aliaslink_zone);
+ uma_zdestroy(nat64lsn_state_zone);
+ uma_zdestroy(nat64lsn_job_zone);
} }
void void
nat64lsn_start_instance(struct nat64lsn_cfg *cfg) nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
{ {
+ CALLOUT_LOCK(cfg);
callout_reset(&cfg->periodic, hz * PERIODIC_DELAY, callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
nat64lsn_periodic, cfg); nat64lsn_periodic, cfg);
+ CALLOUT_UNLOCK(cfg);
} }
struct nat64lsn_cfg * struct nat64lsn_cfg *
-nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)+nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
{ {
struct nat64lsn_cfg *cfg; struct nat64lsn_cfg *cfg;
+ struct nat64lsn_alias *alias;
+ int i, naddr;
- cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);+ cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
- TAILQ_INIT(&cfg->jhead);+ M_WAITOK | M_ZERO);
+
+ CFG_LOCK_INIT(cfg);
+ CALLOUT_LOCK_INIT(cfg);
+ STAILQ_INIT(&cfg->jhead);
cfg->vp = curvnet; cfg->vp = curvnet;
- cfg->ch = ch;
COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK); COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
- cfg->ihsize = NAT64LSN_HSIZE;+ cfg->hash_seed = arc4random();
- cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,+ cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
- M_WAITOK | M_ZERO);+ cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
-+ cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
- cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,+ for (i = 0; i < cfg->hosts_hashsize; i++)
- M_WAITOK | M_ZERO);+ CK_SLIST_INIT(&cfg->hosts_hash[i]);
+
+ naddr = 1 << (32 - plen);
+ cfg->prefix4 = prefix;
+ cfg->pmask4 = prefix | (naddr - 1);
+ cfg->plen4 = plen;
+ cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
+ M_NAT64LSN, M_WAITOK | M_ZERO);
+ for (i = 0; i < naddr; i++) {
+ alias = &cfg->aliases[i];
+ alias->addr = prefix + i; /* host byte order */
+ CK_SLIST_INIT(&alias->hosts);
+ ALIAS_LOCK_INIT(alias);
+ }
- callout_init(&cfg->periodic, CALLOUT_MPSAFE);+ callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
callout_init(&cfg->jcallout, CALLOUT_MPSAFE); callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
return (cfg); return (cfg);
} }
-/*+static void
- * Destroy all hosts callback.+nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
- * Called on module unload when all activity already finished, so
- * can work without any locks.
- */
-static NAT64NOINLINE int
-nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg)
{ {
- struct nat64lsn_portgroup *pg;
int i; int i;
- for (i = nh->pg_used; i > 0; i--) {+ if (pg->chunks_count == 1) {
- pg = PORTGROUP_BYSIDX(cfg, nh, i);+ uma_zfree(nat64lsn_state_zone, pg->states);
- if (pg == NULL)+ } else {
- continue;+ for (i = 0; i < pg->chunks_count; i++)
- cfg->pg[pg->idx] = NULL;+ uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
- destroy_portgroup(pg);+ free(pg->states_chunk, M_NAT64LSN);
- nh->pg_used--;+ free(pg->freemask_chunk, M_NAT64LSN);
} }
- destroy_host6(nh);+ uma_zfree(nat64lsn_pg_zone, pg);
- cfg->ihcount--;+}
- return (0);+
+static void
+nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_alias *alias)
+{
+ struct nat64lsn_pg *pg;
+ int i;
+
+ while (!CK_SLIST_EMPTY(&alias->portgroups)) {
+ pg = CK_SLIST_FIRST(&alias->portgroups);
+ CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
+ nat64lsn_destroy_pg(pg);
+ }
+ for (i = 0; i < 32; i++) {
+ if (ISSET32(alias->tcp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
+ if (ISSET32(alias->udp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
+ if (ISSET32(alias->icmp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
+ }
+ ALIAS_LOCK_DESTROY(alias);
+}
+
+static void
+nat64lsn_destroy_host(struct nat64lsn_host *host)
+{
+ struct nat64lsn_aliaslink *link;
+
+ while (!CK_SLIST_EMPTY(&host->aliases)) {
+ link = CK_SLIST_FIRST(&host->aliases);
+ CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
+
+ ALIAS_LOCK(link->alias);
+ CK_SLIST_REMOVE(&link->alias->hosts, link,
+ nat64lsn_aliaslink, alias_entries);
+ link->alias->hosts_count--;
+ ALIAS_UNLOCK(link->alias);
+
+ uma_zfree(nat64lsn_aliaslink_zone, link);
+ }
+ HOST_LOCK_DESTROY(host);
+ free(host->states_hash, M_NAT64LSN);
+ uma_zfree(nat64lsn_host_zone, host);
} }
void void
nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg) nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
{ {
- struct nat64lsn_host *nh, *tmp;+ struct nat64lsn_host *host;
+ int i;
- callout_drain(&cfg->jcallout);+ CALLOUT_LOCK(cfg);
callout_drain(&cfg->periodic); callout_drain(&cfg->periodic);
- I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg);+ CALLOUT_UNLOCK(cfg);
- DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount);+ callout_drain(&cfg->jcallout);
+
+ for (i = 0; i < cfg->hosts_hashsize; i++) {
+ while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
+ host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
+ CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
+ nat64lsn_destroy_host(host);
+ }
+ }
+
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
+ nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
+ CALLOUT_LOCK_DESTROY(cfg);
+ CFG_LOCK_DESTROY(cfg);
COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS); COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
- free(cfg->ih, M_IPFW);+ free(cfg->hosts_hash, M_NAT64LSN);
- free(cfg->pg, M_IPFW);+ free(cfg->aliases, M_NAT64LSN);
- free(cfg, M_IPFW);+ free(cfg, M_NAT64LSN);
} }
sys/netpfil/ipfw/nat64/nat64lsn.h
@@ -35,75 +35,149 @@
#include "ip_fw_nat64.h" #include "ip_fw_nat64.h"
#include "nat64_translate.h" #include "nat64_translate.h"
-#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
-#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
-
#define NAT64_MIN_PORT 1024 #define NAT64_MIN_PORT 1024
-#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)+struct nat64lsn_host;
+struct nat64lsn_alias;
-struct st_ptr {+struct nat64lsn_state {
- uint8_t idx; /* index in nh->pg_ptr array.+ /* IPv6 host entry keeps hash table to speedup state lookup */
- * NOTE: it starts from 1.+ CK_SLIST_ENTRY(nat64lsn_state) entries;
- */+ struct nat64lsn_host *host;
- uint8_t off;+
+ struct in6_addr ip6_dst; /* Destination IPv6 address */
+
+ in_addr_t ip_src; /* Alias IPv4 address */
+ in_addr_t ip_dst; /* Destination IPv4 address */
+ uint16_t dport; /* Destination port */
+ uint16_t sport; /* Source port */
+
+ uint32_t hval;
+ uint32_t flags; /* Internal flags */
+ uint16_t aport;
+ uint16_t timestamp; /* last used */
+ uint8_t proto;
+ uint8_t _spare[7];
}; };
-#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1)
-#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY)
-#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \
- NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
-struct nat64lsn_portgroup;+struct nat64lsn_states_chunk {
-/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */+ struct nat64lsn_state state[64];
-struct nat64lsn_host {+};
- struct rwlock h_lock; /* Host states lock */+
-+#define ISSET64(mask, bit) ((mask) & ((uint64_t)1 << (bit)))
- struct in6_addr addr;+#define ISSET32(mask, bit) ((mask) & ((uint32_t)1 << (bit)))
- struct nat64lsn_host *next;+struct nat64lsn_pg {
- uint16_t timestamp; /* Last altered */+ CK_SLIST_ENTRY(nat64lsn_pg) entries;
- uint16_t hsize; /* ports hash size */+
- uint16_t pg_used; /* Number of portgroups used */+ uint16_t base_port;
-#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before+ uint16_t timestamp;
- * requesting of new chunk of indexes.+ uint8_t proto;
- */+ uint8_t chunks_count;
- uint16_t pg_allocated; /* Number of portgroups indexes+ uint8_t spare[2];
- * allocated.+
- */+ union {
-#define NAT64LSN_HSIZE 64+ uint64_t freemask64;
- struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */+ uint32_t freemask32[2];
- /*+ uint64_t *freemask64_chunk;
- * PG indexes are stored in chunks with 32 elements.+ uint32_t *freemask32_chunk;
- * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.+ void *freemask_chunk;
- */+ };
-#define NAT64LSN_PGIDX_CHUNK 32+ union {
-#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \+ struct nat64lsn_states_chunk *states;
- NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)+ struct nat64lsn_states_chunk **states_chunk;
- struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */+ };
+};
+
+#define CHUNK_BY_FADDR(p, a) ((a) & ((p)->chunks_count - 1))
+
+#ifdef __LP64__
+#define FREEMASK_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? &(p)->freemask64 : \
+ &(p)->freemask64_chunk[CHUNK_BY_FADDR(p, v)])
+#define FREEMASK_BITCOUNT(pg, faddr) \
+ bitcount64(*FREEMASK_CHUNK((pg), (faddr)))
+#else
+#define FREEMASK_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? &(p)->freemask32[0] : \
+ &(p)->freemask32_chunk[CHUNK_BY_FADDR(p, v) * 2])
+#define FREEMASK_BITCOUNT(pg, faddr) \
+ bitcount64(*(uint64_t *)FREEMASK_CHUNK((pg), (faddr)))
+#endif /* !__LP64__ */
+
+struct nat64lsn_pgchunk {
+ struct nat64lsn_pg *pgptr[32];
}; };
-#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)+struct nat64lsn_aliaslink {
-#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)+ CK_SLIST_ENTRY(nat64lsn_aliaslink) alias_entries;
+ CK_SLIST_ENTRY(nat64lsn_aliaslink) host_entries;
+ struct nat64lsn_alias *alias;
+};
-#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)+CK_SLIST_HEAD(nat64lsn_aliaslink_slist, nat64lsn_aliaslink);
-#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)+CK_SLIST_HEAD(nat64lsn_states_slist, nat64lsn_state);
-#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)+CK_SLIST_HEAD(nat64lsn_hosts_slist, nat64lsn_host);
-#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)+CK_SLIST_HEAD(nat64lsn_pg_slist, nat64lsn_pg);
-#define NAT64_LOCK(h) NAT64_WLOCK(h)+
-#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)+struct nat64lsn_alias {
-#define NAT64_LOCK_INIT(h) do { \+ struct nat64lsn_aliaslink_slist hosts;
- rw_init(&(h)->h_lock, "NAT64 host lock"); \+ struct nat64lsn_pg_slist portgroups;
- } while (0)+
+ struct mtx lock;
+ in_addr_t addr; /* host byte order */
+ uint32_t hosts_count;
+ uint32_t portgroups_count;
+ uint32_t tcp_chunkmask;
+ uint32_t udp_chunkmask;
+ uint32_t icmp_chunkmask;
+
+ uint32_t tcp_pgidx;
+ uint32_t udp_pgidx;
+ uint32_t icmp_pgidx;
+ uint16_t timestamp;
+ uint16_t spare;
+
+ uint32_t tcp_pgmask[32];
+ uint32_t udp_pgmask[32];
+ uint32_t icmp_pgmask[32];
+ struct nat64lsn_pgchunk *tcp[32];
+ struct nat64lsn_pgchunk *udp[32];
+ struct nat64lsn_pgchunk *icmp[32];
+
+ /* pointer to PG that can be used for faster state allocation */
+ struct nat64lsn_pg *tcp_pg;
+ struct nat64lsn_pg *udp_pg;
+ struct nat64lsn_pg *icmp_pg;
+};
+#define ALIAS_LOCK_INIT(p) \
+ mtx_init(&(p)->lock, "alias_lock", NULL, MTX_DEF)
+#define ALIAS_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define ALIAS_LOCK(p) mtx_lock(&(p)->lock)
+#define ALIAS_UNLOCK(p) mtx_unlock(&(p)->lock)
-#define NAT64_LOCK_DESTROY(h) do { \+#define NAT64LSN_HSIZE 256
- rw_destroy(&(h)->h_lock); \+#define NAT64LSN_MAX_HSIZE 4096
- } while (0)+#define NAT64LSN_HOSTS_HSIZE 1024
-/* Internal proto index */+struct nat64lsn_host {
-#define NAT_PROTO_TCP 1+ struct in6_addr addr;
-#define NAT_PROTO_UDP 2+ struct nat64lsn_aliaslink_slist aliases;
-#define NAT_PROTO_ICMP 3+ struct nat64lsn_states_slist *states_hash;
+ CK_SLIST_ENTRY(nat64lsn_host) entries;
+ uint32_t states_count;
+ uint32_t hval;
+ uint32_t flags;
+#define NAT64LSN_DEADHOST 1
+#define NAT64LSN_GROWHASH 2
+ uint16_t states_hashsize;
+ uint16_t timestamp;
+ struct mtx lock;
+};
-#define NAT_MAX_PROTO 4+#define HOST_LOCK_INIT(p) \
-extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];+ mtx_init(&(p)->lock, "host_lock", NULL, MTX_DEF|MTX_NEW)
+#define HOST_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define HOST_LOCK(p) mtx_lock(&(p)->lock)
+#define HOST_UNLOCK(p) mtx_unlock(&(p)->lock)
VNET_DECLARE(uint16_t, nat64lsn_eid); VNET_DECLARE(uint16_t, nat64lsn_eid);
#define V_nat64lsn_eid VNET(nat64lsn_eid) #define V_nat64lsn_eid VNET(nat64lsn_eid)
@@ -112,124 +186,65 @@ VNET_DECLARE(uint16_t, nat64lsn_eid);
/* Timestamp macro */ /* Timestamp macro */
#define _CT ((int)time_uptime % 65536) #define _CT ((int)time_uptime % 65536)
#define SET_AGE(x) (x) = _CT #define SET_AGE(x) (x) = _CT
-#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \+#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x): (int)65536 + _CT - (x))
- (int)65536 + _CT - (x))
-#ifdef __LP64__+STAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
-/* ffsl() is capable of checking 64-bit ints */
-#define _FFS64
-#endif
-
-/* 16 bytes */
-struct nat64lsn_state {
- union {
- struct {
- in_addr_t faddr; /* Remote IPv4 address */
- uint16_t fport; /* Remote IPv4 port */
- uint16_t lport; /* Local IPv6 port */
- }s;
- uint64_t hkey;
- } u;
- uint8_t nat_proto;
- uint8_t flags;
- uint16_t timestamp;
- struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
- struct st_ptr next; /* Next entry index */
-};
-
-/*
- * 1024+32 bytes per 64 states, used to store state
- * AND for outside-in state lookup
- */
-struct nat64lsn_portgroup {
- struct nat64lsn_host *host; /* IPv6 source host info */
- in_addr_t aaddr; /* Alias addr, network format */
- uint16_t aport; /* Base port */
- uint16_t timestamp;
- uint8_t nat_proto;
- uint8_t spare[3];
- uint32_t idx;
-#ifdef _FFS64
- uint64_t freemask; /* Mask of free entries */
-#else
- uint32_t freemask[2]; /* Mask of free entries */
-#endif
- struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
-};
-#ifdef _FFS64
-#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx))
-#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx))
-#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx)))
-#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
-#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask))
-#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
-#else
-#define PG_MARK_BUSY_IDX(_pg, _idx) \
- (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
-#define PG_MARK_FREE_IDX(_pg, _idx) \
- (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
-#define PG_IS_FREE_IDX(_pg, _idx) \
- ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
-#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
-#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
-#define PG_IS_EMPTY(_pg) \
- ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
-
-static inline int
-_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
-{
- int i;
-
- if ((i = ffsl(pg->freemask[0])) != 0)
- return (i);
- if ((i = ffsl(pg->freemask[1])) != 0)
- return (i + 32);
- return (0);
-}
-
-#endif
-
-TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
struct nat64lsn_cfg { struct nat64lsn_cfg {
struct named_object no; struct named_object no;
- struct nat64lsn_portgroup **pg; /* XXX: array of pointers */+
- struct nat64lsn_host **ih; /* Host hash */+ struct nat64lsn_hosts_slist *hosts_hash;
+ struct nat64lsn_alias *aliases; /* array of aliases */
+
+ struct mtx lock;
+ uint32_t hosts_hashsize;
+ uint32_t hash_seed;
+
uint32_t prefix4; /* IPv4 prefix */ uint32_t prefix4; /* IPv4 prefix */
uint32_t pmask4; /* IPv4 prefix mask */ uint32_t pmask4; /* IPv4 prefix mask */
- uint32_t ihsize; /* IPv6 host hash size */
uint8_t plen4; uint8_t plen4;
- uint8_t nomatch_verdict;/* What to return to ipfw on no-match */+ uint8_t nomatch_verdict;/* Return value on no-match */
- uint32_t ihcount; /* Number of items in host hash */+ uint32_t hosts_count; /* Number of items in host hash */
- int max_chunks; /* Max chunks per client */+ uint32_t states_chunks; /* Number of states chunks per PG */
- int agg_prefix_len; /* Prefix length to count */
- int agg_prefix_max; /* Max hosts per agg prefix */
uint32_t jmaxlen; /* Max jobqueue length */ uint32_t jmaxlen; /* Max jobqueue length */
- uint16_t min_chunk; /* Min port group # to use */+ uint16_t host_delete_delay; /* Stale host delete delay */
- uint16_t max_chunk; /* Max port group # to use */+ uint16_t pgchunk_delete_delay;
- uint16_t nh_delete_delay; /* Stale host delete delay */
uint16_t pg_delete_delay; /* Stale portgroup del delay */ uint16_t pg_delete_delay; /* Stale portgroup del delay */
uint16_t st_syn_ttl; /* TCP syn expire */ uint16_t st_syn_ttl; /* TCP syn expire */
uint16_t st_close_ttl; /* TCP fin expire */ uint16_t st_close_ttl; /* TCP fin expire */
uint16_t st_estab_ttl; /* TCP established expire */ uint16_t st_estab_ttl; /* TCP established expire */
uint16_t st_udp_ttl; /* UDP expire */ uint16_t st_udp_ttl; /* UDP expire */
uint16_t st_icmp_ttl; /* ICMP expire */ uint16_t st_icmp_ttl; /* ICMP expire */
- uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */+
struct nat64_config base; struct nat64_config base;
#define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE) #define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE)
+#define NAT64LSN_ANYPREFIX 0x00000100
+ struct mtx periodic_lock;
struct callout periodic; struct callout periodic;
struct callout jcallout; struct callout jcallout;
- struct ip_fw_chain *ch;
struct vnet *vp; struct vnet *vp;
struct nat64lsn_job_head jhead; struct nat64lsn_job_head jhead;
int jlen; int jlen;
char name[64]; /* Nat instance name */ char name[64]; /* Nat instance name */
}; };
+/* CFG_LOCK protects cfg->hosts_hash from modification */
+#define CFG_LOCK_INIT(p) \
+ mtx_init(&(p)->lock, "cfg_lock", NULL, MTX_DEF)
+#define CFG_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define CFG_LOCK(p) mtx_lock(&(p)->lock)
+#define CFG_UNLOCK(p) mtx_unlock(&(p)->lock)
+
+#define CALLOUT_LOCK_INIT(p) \
+ mtx_init(&(p)->periodic_lock, "periodic_lock", NULL, MTX_DEF)
+#define CALLOUT_LOCK_DESTROY(p) mtx_destroy(&(p)->periodic_lock)
+#define CALLOUT_LOCK(p) mtx_lock(&(p)->periodic_lock)
+#define CALLOUT_UNLOCK(p) mtx_unlock(&(p)->periodic_lock)
+
struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch, struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
- size_t numaddr);+ in_addr_t prefix, int plen);
void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg); void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_start_instance(struct nat64lsn_cfg *cfg); void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_init_internal(void); void nat64lsn_init_internal(void);
@@ -237,114 +252,4 @@ void nat64lsn_uninit_internal(void);
int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done); ipfw_insn *cmd, int *done);
-void
-nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
- const char *px, int off);
-/*
- * Portgroup layout
- * addr x nat_proto x port_off
- *
- */
-
-#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
-#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
-
-#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
-#define __GET_PORTGROUP_IDX(_proto, _port) \
- ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \
- ((_port) >> NAT64_CHUNK_SIZE_BITS))
-
-#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
- GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
- __GET_PORTGROUP_IDX(_proto, _port)
-#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
- ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
-
-#define PORTGROUP_CHUNK(_nh, _idx) \
- ((_nh)->pg_ptr[(_idx)])
-#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
- (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
- [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
-
-
-/* Chained hash table */
-#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _x = _PX##first(_ph, _buck); \
- for ( ; _x != NULL; _x = _PX##next(_x)) { \
- if (_PX##cmp(_key, _PX##val(_x))) \
- break; \
- } \
- if (_x == NULL) \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
- _PX##unlock(_ph, _buck);
-
-#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
- unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _PX##next(_i) = _PX##first(_ph, _buck); \
- _PX##first(_ph, _buck) = _i; \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _x = _PX##first(_ph, _buck); \
- _tmp = NULL; \
- for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
- if (_PX##cmp(_key, _PX##val(_x))) \
- break; \
- } \
- if (_x != NULL) { \
- if (_tmp == NULL) \
- _PX##first(_ph, _buck) = _PX##next(_x); \
- else \
- _PX##next(_tmp) = _PX##next(_x); \
- } \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
- for (unsigned int _i = 0; _i < _hsize; _i++) { \
- _PX##lock(_ph, _i); \
- _x = _PX##first(_ph, _i); \
- _tmp = NULL; \
- for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
- if (_cb(_x, _arg) == 0) \
- continue; \
- if (_tmp == NULL) \
- _PX##first(_ph, _i) = _PX##next(_x); \
- else \
- _tmp = _PX##next(_x); \
- } \
- _PX##unlock(_ph, _i); \
- } \
-} while(0)
-
-#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
- unsigned int _buck; \
- for (unsigned int _i = 0; _i < _hsize; _i++) { \
- _x = _PX##first(_ph, _i); \
- _y = _x; \
- while (_y != NULL) { \
- _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
- _y = _PX##next(_x); \
- _PX##next(_x) = _PX##first(_nph, _buck); \
- _PX##first(_nph, _buck) = _x; \
- } \
- } \
-} while(0)
-
#endif /* _IP_FW_NAT64LSN_H_ */ #endif /* _IP_FW_NAT64LSN_H_ */
-
sys/netpfil/ipfw/nat64/nat64lsn_control.c
@@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
#include <sys/counter.h> #include <sys/counter.h>
+#include <sys/ck.h>
+#include <sys/epoch.h>
#include <sys/errno.h> #include <sys/errno.h>
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/lock.h> #include <sys/lock.h>
@@ -43,10 +45,8 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h> #include <sys/rwlock.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/sockopt.h> #include <sys/sockopt.h>
-#include <sys/queue.h>
#include <net/if.h> #include <net/if.h>
-#include <net/pfil.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <netinet/ip.h> #include <netinet/ip.h>
@@ -75,12 +75,6 @@ static void
nat64lsn_default_config(ipfw_nat64lsn_cfg *uc) nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
{ {
- if (uc->max_ports == 0)
- uc->max_ports = NAT64LSN_MAX_PORTS;
- else
- uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE);
- if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR)
- uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR;
if (uc->jmaxlen == 0) if (uc->jmaxlen == 0)
uc->jmaxlen = NAT64LSN_JMAXLEN; uc->jmaxlen = NAT64LSN_JMAXLEN;
if (uc->jmaxlen > 65536) if (uc->jmaxlen > 65536)
@@ -99,6 +93,13 @@ nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
uc->st_udp_ttl = NAT64LSN_UDP_AGE; uc->st_udp_ttl = NAT64LSN_UDP_AGE;
if (uc->st_icmp_ttl == 0) if (uc->st_icmp_ttl == 0)
uc->st_icmp_ttl = NAT64LSN_ICMP_AGE; uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
+
+ if (uc->states_chunks == 0)
+ uc->states_chunks = 1;
+ else if (uc->states_chunks >= 128)
+ uc->states_chunks = 128;
+ else if (!powerof2(uc->states_chunks))
+ uc->states_chunks = 1 << fls(uc->states_chunks);
} }
/* /*
@@ -127,12 +128,20 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
if (ipfw_check_object_name_generic(uc->name) != 0) if (ipfw_check_object_name_generic(uc->name) != 0)
return (EINVAL); return (EINVAL);
- if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)+ if (uc->set >= IPFW_MAX_SETS)
return (EINVAL); return (EINVAL);
if (uc->plen4 > 32) if (uc->plen4 > 32)
return (EINVAL); return (EINVAL);
- if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0)+
+ /*
+ * Unspecified address has special meaning. But it must
+ * have valid prefix length. This length will be used to
+ * correctly extract and embedd IPv4 address into IPv6.
+ */
+ if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
+ IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
+ nat64_check_prefixlen(uc->plen6) != 0)
return (EINVAL); return (EINVAL);
/* XXX: Check prefix4 to be global */ /* XXX: Check prefix4 to be global */
@@ -140,14 +149,6 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
mask4 = ~((1 << (32 - uc->plen4)) - 1); mask4 = ~((1 << (32 - uc->plen4)) - 1);
if ((addr4 & mask4) != addr4) if ((addr4 & mask4) != addr4)
return (EINVAL); return (EINVAL);
- if (uc->min_port == 0)
- uc->min_port = NAT64_MIN_PORT;
- if (uc->max_port == 0)
- uc->max_port = 65535;
- if (uc->min_port > uc->max_port)
- return (EINVAL);
- uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE);
- uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE);
nat64lsn_default_config(uc); nat64lsn_default_config(uc);
@@ -159,7 +160,7 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
} }
IPFW_UH_RUNLOCK(ch); IPFW_UH_RUNLOCK(ch);
- cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));+ cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
strlcpy(cfg->name, uc->name, sizeof(cfg->name)); strlcpy(cfg->name, uc->name, sizeof(cfg->name));
cfg->no.name = cfg->name; cfg->no.name = cfg->name;
cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME; cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
@@ -170,20 +171,12 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX; cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix)) if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
cfg->base.flags |= NAT64_WKPFX; cfg->base.flags |= NAT64_WKPFX;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
+ cfg->base.flags |= NAT64LSN_ANYPREFIX;
- cfg->prefix4 = addr4;+ cfg->states_chunks = uc->states_chunks;
- cfg->pmask4 = addr4 | ~mask4;
- cfg->plen4 = uc->plen4;
-
- cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
- cfg->agg_prefix_len = uc->agg_prefix_len;
- cfg->agg_prefix_max = uc->agg_prefix_max;
-
- cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;
- cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
-
cfg->jmaxlen = uc->jmaxlen; cfg->jmaxlen = uc->jmaxlen;
- cfg->nh_delete_delay = uc->nh_delete_delay;+ cfg->host_delete_delay = uc->nh_delete_delay;
cfg->pg_delete_delay = uc->pg_delete_delay; cfg->pg_delete_delay = uc->pg_delete_delay;
cfg->st_syn_ttl = uc->st_syn_ttl; cfg->st_syn_ttl = uc->st_syn_ttl;
cfg->st_close_ttl = uc->st_close_ttl; cfg->st_close_ttl = uc->st_close_ttl;
@@ -249,7 +242,7 @@ nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) { if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch);
- return (ESRCH);+ return (ENOENT);
} }
if (cfg->no.refcnt > 0) { if (cfg->no.refcnt > 0) {
@@ -272,6 +265,8 @@ static void
export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
struct ipfw_nat64lsn_stats *stats) struct ipfw_nat64lsn_stats *stats)
{ {
+ struct nat64lsn_alias *alias;
+ int i, j;
__COPY_STAT_FIELD(cfg, stats, opcnt64); __COPY_STAT_FIELD(cfg, stats, opcnt64);
__COPY_STAT_FIELD(cfg, stats, opcnt46); __COPY_STAT_FIELD(cfg, stats, opcnt46);
@@ -299,10 +294,16 @@ export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
__COPY_STAT_FIELD(cfg, stats, spgcreated); __COPY_STAT_FIELD(cfg, stats, spgcreated);
__COPY_STAT_FIELD(cfg, stats, spgdeleted); __COPY_STAT_FIELD(cfg, stats, spgdeleted);
- stats->hostcount = cfg->ihcount;+ stats->hostcount = cfg->hosts_count;
- stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
- stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];+ alias = &cfg->aliases[i];
- stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];+ for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
+ stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
+ for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
+ stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
+ for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
+ stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
+ }
} }
#undef __COPY_STAT_FIELD #undef __COPY_STAT_FIELD
@@ -312,12 +313,9 @@ nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
{ {
uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK; uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
- uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;+ uc->states_chunks = cfg->states_chunks;
- uc->agg_prefix_len = cfg->agg_prefix_len;
- uc->agg_prefix_max = cfg->agg_prefix_max;
-
uc->jmaxlen = cfg->jmaxlen; uc->jmaxlen = cfg->jmaxlen;
- uc->nh_delete_delay = cfg->nh_delete_delay;+ uc->nh_delete_delay = cfg->host_delete_delay;
uc->pg_delete_delay = cfg->pg_delete_delay; uc->pg_delete_delay = cfg->pg_delete_delay;
uc->st_syn_ttl = cfg->st_syn_ttl; uc->st_syn_ttl = cfg->st_syn_ttl;
uc->st_close_ttl = cfg->st_close_ttl; uc->st_close_ttl = cfg->st_close_ttl;
@@ -425,7 +423,7 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) { if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch); IPFW_UH_RUNLOCK(ch);
- return (EEXIST);+ return (ENOENT);
} }
nat64lsn_export_config(ch, cfg, uc); nat64lsn_export_config(ch, cfg, uc);
IPFW_UH_RUNLOCK(ch); IPFW_UH_RUNLOCK(ch);
@@ -438,18 +436,18 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) { if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch);
- return (EEXIST);+ return (ENOENT);
} }
/* /*
* For now allow to change only following values: * For now allow to change only following values:
* jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age, * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
- * tcp_est_age, udp_age, icmp_age, flags, max_ports.+ * tcp_est_age, udp_age, icmp_age, flags, states_chunks.
*/ */
- cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;+ cfg->states_chunks = uc->states_chunks;
cfg->jmaxlen = uc->jmaxlen; cfg->jmaxlen = uc->jmaxlen;
- cfg->nh_delete_delay = uc->nh_delete_delay;+ cfg->host_delete_delay = uc->nh_delete_delay;
cfg->pg_delete_delay = uc->pg_delete_delay; cfg->pg_delete_delay = uc->pg_delete_delay;
cfg->st_syn_ttl = uc->st_syn_ttl; cfg->st_syn_ttl = uc->st_syn_ttl;
cfg->st_close_ttl = uc->st_close_ttl; cfg->st_close_ttl = uc->st_close_ttl;
@@ -496,7 +494,7 @@ nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) { if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch); IPFW_UH_RUNLOCK(ch);
- return (ESRCH);+ return (ENOENT);
} }
export_stats(ch, cfg, &stats); export_stats(ch, cfg, &stats);
@@ -538,163 +536,176 @@ nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) { if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch);
- return (ESRCH);+ return (ENOENT);
} }
COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS); COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
IPFW_UH_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch);
return (0); return (0);
} }
+#ifdef __LP64__
+#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n))
+#else
+#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n)) | \
+ ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
+#endif
/* /*
* Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
* ipfw_nat64lsn_state x count, ... ] ] * ipfw_nat64lsn_state x count, ... ] ]
*/ */
static int static int
-export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,+nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
- ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)+ struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
{ {
- ipfw_nat64lsn_state *ste;+ ipfw_nat64lsn_state_v1 *s;
- struct nat64lsn_state *st;+ struct nat64lsn_state *state;
- int i, count;+ uint64_t freemask;
+ uint32_t i, count;
- NAT64_LOCK(pg->host);+ /* validate user input */
- count = 0;+ if (idx->chunk > pg->chunks_count - 1)
- for (i = 0; i < 64; i++) {+ return (EINVAL);
- if (PG_IS_BUSY_IDX(pg, i))
- count++;
- }
- DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count);
- if (count == 0) {+ FREEMASK_COPY(pg, idx->chunk, freemask);
- stg->count = 0;+ count = 64 - bitcount64(freemask);
- NAT64_UNLOCK(pg->host);+ if (count == 0)
- return (0);+ return (0); /* Try next PG/chunk */
- }+
- ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,+ DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
- count * sizeof(ipfw_nat64lsn_state));+ (uintmax_t)idx->index, count);
- if (ste == NULL) {+
- NAT64_UNLOCK(pg->host);+ s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
- return (1);+ count * sizeof(ipfw_nat64lsn_state_v1));
- }+ if (s == NULL)
+ return (ENOMEM);
- stg->alias4.s_addr = pg->aaddr;
- stg->proto = nat64lsn_rproto_map[pg->nat_proto];
- stg->flags = 0;
- stg->host6 = pg->host->addr;
- stg->count = count;
for (i = 0; i < 64; i++) { for (i = 0; i < 64; i++) {
- if (PG_IS_FREE_IDX(pg, i))+ if (ISSET64(freemask, i))
continue; continue;
- st = &pg->states[i];+ state = pg->chunks_count == 1 ? &pg->states->state[i] :
- ste->daddr.s_addr = st->u.s.faddr;+ &pg->states_chunk[idx->chunk]->state[i];
- ste->dport = st->u.s.fport;+
- ste->aport = pg->aport + i;+ s->host6 = state->host->addr;
- ste->sport = st->u.s.lport;+ s->daddr.s_addr = htonl(state->ip_dst);
- ste->flags = st->flags; /* XXX filter flags */+ s->dport = state->dport;
- ste->idle = GET_AGE(st->timestamp);+ s->sport = state->sport;
- ste++;+ s->aport = state->aport;
+ s->flags = (uint8_t)(state->flags & 7);
+ s->proto = state->proto;
+ s->idle = GET_AGE(state->timestamp);
+ s++;
} }
- NAT64_UNLOCK(pg->host);+ *ret_count = count;
-
return (0); return (0);
} }
+#define LAST_IDX 0xFF
static int static int
-get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,+nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
- uint16_t *port)+ union nat64lsn_pgidx *idx)
{ {
- if (*port < 65536 - NAT64_CHUNK_SIZE) {+ /* First iterate over chunks */
- *port += NAT64_CHUNK_SIZE;+ if (pg != NULL) {
- return (0);+ if (idx->chunk < pg->chunks_count - 1) {
+ idx->chunk++;
+ return (0);
+ }
} }
- *port = 0;+ idx->chunk = 0;
-+ /* Then over PGs */
- if (*nat_proto < NAT_MAX_PROTO - 1) {+ if (idx->port < UINT16_MAX - 64) {
- *nat_proto += 1;+ idx->port += 64;
return (0); return (0);
} }
- *nat_proto = 1;+ idx->port = NAT64_MIN_PORT;
-+ /* Then over supported protocols */
- if (*addr < cfg->pmask4) {+ switch (idx->proto) {
- *addr += 1;+ case IPPROTO_ICMP:
+ idx->proto = IPPROTO_TCP;
return (0); return (0);
+ case IPPROTO_TCP:
+ idx->proto = IPPROTO_UDP;
+ return (0);
+ default:
+ idx->proto = IPPROTO_ICMP;
} }
-+ /* And then over IPv4 alias addresses */
- /* End of space. */+ if (idx->addr < cfg->pmask4) {
- return (1);+ idx->addr++;
+ return (1); /* New states group is needed */
+ }
+ idx->index = LAST_IDX;
+ return (-1); /* No more states */
} }
-#define PACK_IDX(addr, proto, port) \+static struct nat64lsn_pg*
- ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)+nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
-#define UNPACK_IDX(idx, addr, proto, port) \
- (addr) = (uint32_t)((idx) >> 32); \
- (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \
- (proto) = (uint8_t)(((idx) >> 8) & 0xFF)
-
-static struct nat64lsn_portgroup *
-get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
- uint16_t *port)
{ {
- struct nat64lsn_portgroup *pg;+ struct nat64lsn_alias *alias;
- uint64_t pre_pack, post_pack;+ int pg_idx;
-+
- pg = NULL;+ alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
- pre_pack = PACK_IDX(*addr, *nat_proto, *port);+ MPASS(alias->addr == idx->addr);
- for (;;) {+
- if (get_next_idx(cfg, addr, nat_proto, port) != 0) {+ pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
- /* End of states */+ switch (idx->proto) {
- return (pg);+ case IPPROTO_ICMP:
- }+ if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
-+ return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
- pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);+ break;
- if (pg != NULL)+ case IPPROTO_TCP:
- break;+ if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
+ return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
+ break;
+ case IPPROTO_UDP:
+ if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
+ return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
+ break;
} }
-+ return (NULL);
- post_pack = PACK_IDX(*addr, *nat_proto, *port);
- if (pre_pack == post_pack)
- DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d",
- *addr, *nat_proto, *port);
- return (pg);
} }
-static NAT64NOINLINE struct nat64lsn_portgroup *+/*
-get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,+ * Lists nat64lsn states.
- uint16_t *port)+ * Data layout (v0):
+ * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
+ * Reply: [ ipfw_obj_header ipfw_obj_data [
+ * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{ {
- struct nat64lsn_portgroup *pg;
- pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);+ /* TODO: implement states listing for old ipfw(8) binaries */
- if (pg == NULL)+ return (EOPNOTSUPP);
- pg = get_next_pg(cfg, addr, nat_proto, port);
-
- return (pg);
} }
/* /*
* Lists nat64lsn states. * Lists nat64lsn states.
- * Data layout (v0)(current):+ * Data layout (v1)(current):
* Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]] * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
* Reply: [ ipfw_obj_header ipfw_obj_data [ * Reply: [ ipfw_obj_header ipfw_obj_data [
- * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]+ * ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
* *
* Returns 0 on success * Returns 0 on success
*/ */
static int static int
-nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,+nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd) struct sockopt_data *sd)
{ {
ipfw_obj_header *oh; ipfw_obj_header *oh;
ipfw_obj_data *od; ipfw_obj_data *od;
- ipfw_nat64lsn_stg *stg;+ ipfw_nat64lsn_stg_v1 *stg;
struct nat64lsn_cfg *cfg; struct nat64lsn_cfg *cfg;
- struct nat64lsn_portgroup *pg, *pg_next;+ struct nat64lsn_pg *pg;
- uint64_t next_idx;+ union nat64lsn_pgidx idx;
size_t sz; size_t sz;
- uint32_t addr, states;+ uint32_t count, total;
- uint16_t port;+ int ret;
- uint8_t nat_proto;
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
sizeof(uint64_t); sizeof(uint64_t);
@@ -708,78 +719,96 @@ nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
od->head.length != sz - sizeof(ipfw_obj_header)) od->head.length != sz - sizeof(ipfw_obj_header))
return (EINVAL); return (EINVAL);
- next_idx = *(uint64_t *)(od + 1);+ idx.index = *(uint64_t *)(od + 1);
- /* Translate index to the request position to start from */+ if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
- UNPACK_IDX(next_idx, addr, nat_proto, port);+ idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
- if (nat_proto >= NAT_MAX_PROTO)
return (EINVAL); return (EINVAL);
- if (nat_proto == 0 && addr != 0)+ if (idx.index == LAST_IDX)
return (EINVAL); return (EINVAL);
IPFW_UH_RLOCK(ch); IPFW_UH_RLOCK(ch);
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) { if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch); IPFW_UH_RUNLOCK(ch);
- return (ESRCH);+ return (ENOENT);
} }
- /* Fill in starting point */+ if (idx.index == 0) { /* Fill in starting point */
- if (addr == 0) {+ idx.addr = cfg->prefix4;
- addr = cfg->prefix4;+ idx.proto = IPPROTO_ICMP;
- nat_proto = 1;+ idx.port = NAT64_MIN_PORT;
- port = 0;
} }
- if (addr < cfg->prefix4 || addr > cfg->pmask4) {+ if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
+ idx.port < NAT64_MIN_PORT) {
IPFW_UH_RUNLOCK(ch); IPFW_UH_RUNLOCK(ch);
- DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u",
- (uintmax_t)next_idx, addr, cfg->pmask4);
return (EINVAL); return (EINVAL);
} }
-
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
- sizeof(ipfw_nat64lsn_stg);+ sizeof(ipfw_nat64lsn_stg_v1);
- if (sd->valsize < sz)+ if (sd->valsize < sz) {
+ IPFW_UH_RUNLOCK(ch);
return (ENOMEM); return (ENOMEM);
+ }
oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz); oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
od = (ipfw_obj_data *)(oh + 1); od = (ipfw_obj_data *)(oh + 1);
od->head.type = IPFW_TLV_OBJDATA; od->head.type = IPFW_TLV_OBJDATA;
od->head.length = sz - sizeof(ipfw_obj_header); od->head.length = sz - sizeof(ipfw_obj_header);
- stg = (ipfw_nat64lsn_stg *)(od + 1);+ stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
-+ stg->count = total = 0;
- pg = get_first_pg(cfg, &addr, &nat_proto, &port);+ stg->next.index = idx.index;
- if (pg == NULL) {+ /*
- /* No states */+ * Acquire CALLOUT_LOCK to avoid races with expiration code.
- stg->next_idx = 0xFF;+ * Thus states, hosts and PGs will not expire while we hold it.
- stg->count = 0;+ */
- IPFW_UH_RUNLOCK(ch);+ CALLOUT_LOCK(cfg);
- return (0);+ ret = 0;
- }+ do {
- states = 0;+ pg = nat64lsn_get_pg_byidx(cfg, &idx);
- pg_next = NULL;+ if (pg != NULL) {
- while (pg != NULL) {+ count = 0;
- pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);+ ret = nat64lsn_export_states_v1(cfg, &idx, pg,
- if (pg_next == NULL)+ sd, &count);
- stg->next_idx = 0xFF;+ if (ret != 0)
- else+ break;
- stg->next_idx = PACK_IDX(addr, nat_proto, port);+ if (count > 0) {
-+ stg->count += count;
- if (export_pg_states(cfg, pg, stg, sd) != 0) {+ total += count;
- IPFW_UH_RUNLOCK(ch);+ /* Update total size of reply */
- return (states == 0 ? ENOMEM: 0);+ od->head.length +=
+ count * sizeof(ipfw_nat64lsn_state_v1);
+ sz += count * sizeof(ipfw_nat64lsn_state_v1);
+ }
+ stg->alias4.s_addr = htonl(idx.addr);
} }
- states += stg->count;+ /* Determine new index */
- od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);+ switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
- sz += stg->count * sizeof(ipfw_nat64lsn_state);+ case -1:
- if (pg_next != NULL) {+ ret = ENOENT; /* End of search */
- sz += sizeof(ipfw_nat64lsn_stg);+ break;
- if (sd->valsize < sz)+ case 1: /*
+ * Next alias address, new group may be needed.
+ * If states count is zero, use this group.
+ */
+ if (stg->count == 0)
+ continue;
+ /* Otherwise try to create new group */
+ sz += sizeof(ipfw_nat64lsn_stg_v1);
+ if (sd->valsize < sz) {
+ ret = ENOMEM;
break; break;
- stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,+ }
- sizeof(ipfw_nat64lsn_stg));+ /* Save next index in current group */
+ stg->next.index = idx.index;
+ stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
+ sizeof(ipfw_nat64lsn_stg_v1));
+ od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
+ stg->count = 0;
+ break;
} }
- pg = pg_next;+ stg->next.index = idx.index;
- }+ } while (ret == 0);
+ CALLOUT_UNLOCK(cfg);
IPFW_UH_RUNLOCK(ch); IPFW_UH_RUNLOCK(ch);
- return (0);+ return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
} }
static struct ipfw_sopt_handler scodes[] = { static struct ipfw_sopt_handler scodes[] = {
@@ -789,7 +818,8 @@ static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list }, { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
{ IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats }, { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
{ IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats }, { IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats },
- { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states },+ { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states_v0 },
+ { IP_FW_NAT64LSN_LIST_STATES,1, HDIR_GET, nat64lsn_states_v1 },
}; };
static int static int
sys/sys/param.h
@@ -60,7 +60,7 @@
* in the range 5 to 9. * in the range 5 to 9.
*/ */
#undef __FreeBSD_version #undef __FreeBSD_version
-#define __FreeBSD_version 1300016 /* Master, propagated to newvers */+#define __FreeBSD_version 1300017 /* Master, propagated to newvers */
/* /*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
sys/sys/stat.h
@@ -224,6 +224,10 @@ struct nstat {
#define st_ctime st_ctim.tv_sec #define st_ctime st_ctim.tv_sec
#if __BSD_VISIBLE #if __BSD_VISIBLE
#define st_birthtime st_birthtim.tv_sec #define st_birthtime st_birthtim.tv_sec
+#define st_atimensec st_atim.tv_nsec
+#define st_mtimensec st_mtim.tv_nsec
+#define st_ctimensec st_ctim.tv_nsec
+#define st_birthtimensec st_birthtim.tv_nsec
#endif #endif
/* For compatibility. */ /* For compatibility. */
sys/sys/sysctl.h
@@ -354,6 +354,25 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
NULL); \ NULL); \
}) })
+/* Oid for a constant '\0' terminated string. */
+#define SYSCTL_CONST_STRING(parent, nbr, name, access, arg, descr) \
+ SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access), \
+ __DECONST(char *, arg), 0, sysctl_handle_string, "A", descr); \
+ CTASSERT(!(access & CTLFLAG_WR)); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING)
+
+#define SYSCTL_ADD_CONST_STRING(ctx, parent, nbr, name, access, arg, descr) \
+({ \
+ char *__arg = __DECONST(char *, arg); \
+ CTASSERT(!(access & CTLFLAG_WR)); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING); \
+ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access), \
+ __arg, 0, sysctl_handle_string, "A", __DESCR(descr), \
+ NULL); \
+})
+
/* Oid for a bool. If ptr is NULL, val is returned. */ /* Oid for a bool. If ptr is NULL, val is returned. */
#define SYSCTL_NULL_BOOL_PTR ((bool *)NULL) #define SYSCTL_NULL_BOOL_PTR ((bool *)NULL)
#define SYSCTL_BOOL(parent, nbr, name, access, ptr, val, descr) \ #define SYSCTL_BOOL(parent, nbr, name, access, ptr, val, descr) \
sys/ufs/ffs/ffs_softdep.c
@@ -13970,6 +13970,8 @@ softdep_bp_to_mp(bp)
if (LIST_EMPTY(&bp->b_dep)) if (LIST_EMPTY(&bp->b_dep))
return (NULL); return (NULL);
vp = bp->b_vp; vp = bp->b_vp;
+ KASSERT(vp != NULL,
+ ("%s, buffer with dependencies lacks vnode", __func__));
/* /*
* The ump mount point is stable after we get a correct * The ump mount point is stable after we get a correct
@@ -13979,17 +13981,33 @@ softdep_bp_to_mp(bp)
* workitem might be freed while dereferenced. * workitem might be freed while dereferenced.
*/ */
retry: retry:
- if (vp->v_type == VCHR) {+ switch (vp->v_type) {
+ case VCHR:
VI_LOCK(vp); VI_LOCK(vp);
mp = vp->v_type == VCHR ? vp->v_rdev->si_mountpt : NULL; mp = vp->v_type == VCHR ? vp->v_rdev->si_mountpt : NULL;
VI_UNLOCK(vp); VI_UNLOCK(vp);
if (mp == NULL) if (mp == NULL)
goto retry; goto retry;
- } else if (vp->v_type == VREG || vp->v_type == VDIR ||+ break;
- vp->v_type == VLNK || vp->v_type == VFIFO) {+ case VREG:
+ case VDIR:
+ case VLNK:
+ case VFIFO:
+ case VSOCK:
mp = vp->v_mount; mp = vp->v_mount;
- } else {+ break;
- return (NULL);+ case VBLK:
+ vn_printf(vp, "softdep_bp_to_mp: unexpected block device\n");
+ /* FALLTHROUGH */
+ case VNON:
+ case VBAD:
+ case VMARKER:
+ mp = NULL;
+ break;
+ default:
+ vn_printf(vp, "unknown vnode type");
+ mp = NULL;
+ break;
} }
return (VFSTOUFS(mp)); return (VFSTOUFS(mp));
} }
sys/vm/vm_fault.c
@@ -1757,8 +1757,7 @@ again:
} }
pmap_copy_page(src_m, dst_m); pmap_copy_page(src_m, dst_m);
VM_OBJECT_RUNLOCK(object); VM_OBJECT_RUNLOCK(object);
- dst_m->valid = VM_PAGE_BITS_ALL;+ dst_m->dirty = dst_m->valid = src_m->valid;
- dst_m->dirty = VM_PAGE_BITS_ALL;
} else { } else {
dst_m = src_m; dst_m = src_m;
if (vm_page_sleep_if_busy(dst_m, "fltupg")) if (vm_page_sleep_if_busy(dst_m, "fltupg"))
@@ -1771,8 +1770,6 @@ again:
*/ */
break; break;
vm_page_xbusy(dst_m); vm_page_xbusy(dst_m);
- KASSERT(dst_m->valid == VM_PAGE_BITS_ALL,
- ("invalid dst page %p", dst_m));
} }
VM_OBJECT_WUNLOCK(dst_object); VM_OBJECT_WUNLOCK(dst_object);
@@ -1780,9 +1777,18 @@ again:
* Enter it in the pmap. If a wired, copy-on-write * Enter it in the pmap. If a wired, copy-on-write
* mapping is being replaced by a write-enabled * mapping is being replaced by a write-enabled
* mapping, then wire that new mapping. * mapping, then wire that new mapping.
+ *
+ * The page can be invalid if the user called
+ * msync(MS_INVALIDATE) or truncated the backing vnode
+ * or shared memory object. In this case, do not
+ * insert it into pmap, but still do the copy so that
+ * all copies of the wired map entry have similar
+ * backing pages.
*/ */
- pmap_enter(dst_map->pmap, vaddr, dst_m, prot,+ if (dst_m->valid == VM_PAGE_BITS_ALL) {
- access | (upgrade ? PMAP_ENTER_WIRED : 0), 0);+ pmap_enter(dst_map->pmap, vaddr, dst_m, prot,
+ access | (upgrade ? PMAP_ENTER_WIRED : 0), 0);
+ }
/* /*
* Mark it no longer busy, and put it on the active list. * Mark it no longer busy, and put it on the active list.
tests/sys/netmap/ctrl-api-test.c
@@ -146,12 +146,12 @@ struct TestContext {
uint32_t nr_hdr_len; /* for PORT_HDR_SET and PORT_HDR_GET */ uint32_t nr_hdr_len; /* for PORT_HDR_SET and PORT_HDR_GET */
uint32_t nr_first_cpu_id; /* vale polling */ uint32_t nr_first_cpu_id; /* vale polling */
uint32_t nr_num_polling_cpus; /* vale polling */ uint32_t nr_num_polling_cpus; /* vale polling */
+ uint32_t sync_kloop_mode; /* sync-kloop */
int fd; /* netmap file descriptor */ int fd; /* netmap file descriptor */
void *csb; /* CSB entries (atok and ktoa) */ void *csb; /* CSB entries (atok and ktoa) */
struct nmreq_option *nr_opt; /* list of options */ struct nmreq_option *nr_opt; /* list of options */
sem_t *sem; /* for thread synchronization */ sem_t *sem; /* for thread synchronization */
- struct nmport_d *nmport; /* nmport descriptor from libnetmap */
}; };
static struct TestContext ctx_; static struct TestContext ctx_;
@@ -352,8 +352,11 @@ niocregif(struct TestContext *ctx, int netmap_api)
/* The 11 ABI is the one right before the introduction of the new NIOCCTRL /* The 11 ABI is the one right before the introduction of the new NIOCCTRL
* ABI. The 11 ABI is useful to perform tests with legacy applications * ABI. The 11 ABI is useful to perform tests with legacy applications
- * (which use the 11 ABI) and new kernel (which uses 12, or higher). */+ * (which use the 11 ABI) and new kernel (which uses 12, or higher).
-#define NETMAP_API_NIOCREGIF 11+ * However, version 14 introduced a change in the layout of struct netmap_if,
+ * so that binary backward compatibility to 11 is not supported anymore.
+ */
+#define NETMAP_API_NIOCREGIF 14
static int static int
legacy_regif_default(struct TestContext *ctx) legacy_regif_default(struct TestContext *ctx)
@@ -1113,7 +1116,7 @@ bad_extmem_option(struct TestContext *ctx)
pools_info_fill(&pools_info); pools_info_fill(&pools_info);
/* Request a large ring size, to make sure that the kernel /* Request a large ring size, to make sure that the kernel
* rejects our request. */ * rejects our request. */
- pools_info.nr_ring_pool_objsize = (1 << 16);+ pools_info.nr_ring_pool_objsize = (1 << 20);
return _extmem_option(ctx, &pools_info) < 0 ? 0 : -1; return _extmem_option(ctx, &pools_info) < 0 ? 0 : -1;
} }
@@ -1140,6 +1143,10 @@ duplicate_extmem_options(struct TestContext *ctx)
save1 = e1; save1 = e1;
save2 = e2; save2 = e2;
+ strncpy(ctx->ifname_ext, "vale0:0", sizeof(ctx->ifname_ext));
+ ctx->nr_tx_slots = 16;
+ ctx->nr_rx_slots = 16;
+
ret = port_register_hwall(ctx); ret = port_register_hwall(ctx);
if (ret >= 0) { if (ret >= 0) {
printf("duplicate option not detected\n"); printf("duplicate option not detected\n");
@@ -1322,51 +1329,58 @@ sync_kloop(struct TestContext *ctx)
static int static int
sync_kloop_eventfds(struct TestContext *ctx) sync_kloop_eventfds(struct TestContext *ctx)
{ {
- struct nmreq_opt_sync_kloop_eventfds *opt = NULL;+ struct nmreq_opt_sync_kloop_eventfds *evopt = NULL;
- struct nmreq_option save;+ struct nmreq_opt_sync_kloop_mode modeopt;
+ struct nmreq_option evsave;
int num_entries; int num_entries;
size_t opt_size; size_t opt_size;
int ret, i; int ret, i;
+ memset(&modeopt, 0, sizeof(modeopt));
+ modeopt.nro_opt.nro_reqtype = NETMAP_REQ_OPT_SYNC_KLOOP_MODE;
+ modeopt.mode = ctx->sync_kloop_mode;
+ push_option(&modeopt.nro_opt, ctx);
+
num_entries = num_registered_rings(ctx); num_entries = num_registered_rings(ctx);
- opt_size = sizeof(*opt) + num_entries * sizeof(opt->eventfds[0]);+ opt_size = sizeof(*evopt) + num_entries * sizeof(evopt->eventfds[0]);
- opt = calloc(1, opt_size);+ evopt = calloc(1, opt_size);
- opt->nro_opt.nro_next = 0;+ evopt->nro_opt.nro_next = 0;
- opt->nro_opt.nro_reqtype = NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS;+ evopt->nro_opt.nro_reqtype = NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS;
- opt->nro_opt.nro_status = 0;+ evopt->nro_opt.nro_status = 0;
- opt->nro_opt.nro_size = opt_size;+ evopt->nro_opt.nro_size = opt_size;
for (i = 0; i < num_entries; i++) { for (i = 0; i < num_entries; i++) {
int efd = eventfd(0, 0); int efd = eventfd(0, 0);
- opt->eventfds[i].ioeventfd = efd;+ evopt->eventfds[i].ioeventfd = efd;
efd = eventfd(0, 0); efd = eventfd(0, 0);
- opt->eventfds[i].irqfd = efd;+ evopt->eventfds[i].irqfd = efd;
} }
- push_option(&opt->nro_opt, ctx);+ push_option(&evopt->nro_opt, ctx);
- save = opt->nro_opt;+ evsave = evopt->nro_opt;
ret = sync_kloop_start_stop(ctx); ret = sync_kloop_start_stop(ctx);
if (ret != 0) { if (ret != 0) {
- free(opt);+ free(evopt);
clear_options(ctx); clear_options(ctx);
return ret; return ret;
} }
#ifdef __linux__ #ifdef __linux__
- save.nro_status = 0;+ evsave.nro_status = 0;
#else /* !__linux__ */ #else /* !__linux__ */
- save.nro_status = EOPNOTSUPP;+ evsave.nro_status = EOPNOTSUPP;
#endif /* !__linux__ */ #endif /* !__linux__ */
- ret = checkoption(&opt->nro_opt, &save);+ ret = checkoption(&evopt->nro_opt, &evsave);
- free(opt);+ free(evopt);
clear_options(ctx); clear_options(ctx);
return ret; return ret;
} }
static int static int
-sync_kloop_eventfds_all(struct TestContext *ctx)+sync_kloop_eventfds_all_mode(struct TestContext *ctx,
+ uint32_t sync_kloop_mode)
{ {
int ret; int ret;
@@ -1375,9 +1389,17 @@ sync_kloop_eventfds_all(struct TestContext *ctx)
return ret; return ret;
} }
+ ctx->sync_kloop_mode = sync_kloop_mode;
+
return sync_kloop_eventfds(ctx); return sync_kloop_eventfds(ctx);
} }
+static int
+sync_kloop_eventfds_all(struct TestContext *ctx)
+{
+ return sync_kloop_eventfds_all_mode(ctx, 0);
+}
+
static int static int
sync_kloop_eventfds_all_tx(struct TestContext *ctx) sync_kloop_eventfds_all_tx(struct TestContext *ctx)
{ {
@@ -1398,6 +1420,27 @@ sync_kloop_eventfds_all_tx(struct TestContext *ctx)
return sync_kloop_eventfds(ctx); return sync_kloop_eventfds(ctx);
} }
+static int
+sync_kloop_eventfds_all_direct(struct TestContext *ctx)
+{
+ return sync_kloop_eventfds_all_mode(ctx,
+ NM_OPT_SYNC_KLOOP_DIRECT_TX | NM_OPT_SYNC_KLOOP_DIRECT_RX);
+}
+
+static int
+sync_kloop_eventfds_all_direct_tx(struct TestContext *ctx)
+{
+ return sync_kloop_eventfds_all_mode(ctx,
+ NM_OPT_SYNC_KLOOP_DIRECT_TX);
+}
+
+static int
+sync_kloop_eventfds_all_direct_rx(struct TestContext *ctx)
+{
+ return sync_kloop_eventfds_all_mode(ctx,
+ NM_OPT_SYNC_KLOOP_DIRECT_RX);
+}
+
static int static int
sync_kloop_nocsb(struct TestContext *ctx) sync_kloop_nocsb(struct TestContext *ctx)
{ {
@@ -1677,6 +1720,9 @@ static struct mytest tests[] = {
decltest(sync_kloop), decltest(sync_kloop),
decltest(sync_kloop_eventfds_all), decltest(sync_kloop_eventfds_all),
decltest(sync_kloop_eventfds_all_tx), decltest(sync_kloop_eventfds_all_tx),
+ decltest(sync_kloop_eventfds_all_direct),
+ decltest(sync_kloop_eventfds_all_direct_tx),
+ decltest(sync_kloop_eventfds_all_direct_rx),
decltest(sync_kloop_nocsb), decltest(sync_kloop_nocsb),
decltest(sync_kloop_csb_enable), decltest(sync_kloop_csb_enable),
decltest(sync_kloop_conflict), decltest(sync_kloop_conflict),
usr.bin/lockf/lockf.c
@@ -174,6 +174,8 @@ acquire_lock(const char *name, int flags)
if ((fd = open(name, O_RDONLY|O_EXLOCK|flags, 0666)) == -1) { if ((fd = open(name, O_RDONLY|O_EXLOCK|flags, 0666)) == -1) {
if (errno == EAGAIN || errno == EINTR) if (errno == EAGAIN || errno == EINTR)
return (-1); return (-1);
+ else if (errno == ENOENT && (flags & O_CREAT) == 0)
+ err(EX_UNAVAILABLE, "%s", name);
err(EX_CANTCREAT, "cannot open %s", name); err(EX_CANTCREAT, "cannot open %s", name);
} }
return (fd); return (fd);
usr.sbin/makefs/mtree.c
@@ -644,14 +644,17 @@ read_mtree_keywords(FILE *fp, fsnode *node)
st->st_atime = num; st->st_atime = num;
st->st_ctime = num; st->st_ctime = num;
st->st_mtime = num; st->st_mtime = num;
+#if HAVE_STRUCT_STAT_ST_MTIMENSEC
if (p == NULL) if (p == NULL)
break; break;
error = read_number(p, 10, &num, 0, error = read_number(p, 10, &num, 0,
INTMAX_MAX); INTMAX_MAX);
if (error) if (error)
break; break;
- if (num != 0)+ st->st_atimensec = num;
- error = EINVAL;+ st->st_ctimensec = num;
+ st->st_mtimensec = num;
+#endif
} else if (strcmp(keyword, "type") == 0) { } else if (strcmp(keyword, "type") == 0) {
if (value == NULL) { if (value == NULL) {
error = ENOATTR; error = ENOATTR;
usr.sbin/makefs/tests/makefs_tests_common.sh
@@ -29,13 +29,7 @@
KB=1024 KB=1024
: ${TMPDIR=/tmp} : ${TMPDIR=/tmp}
-# TODO: add mtree `time` support; get a lot of errors like this right now when+DEFAULT_MTREE_KEYWORDS="type,mode,gid,uid,size,link,time"
-# passing generating disk images with keyword mtree support, like:
-#
-# `[...]/mtree.spec:8: error: time: invalid value '1446458503'`
-#
-#DEFAULT_MTREE_KEYWORDS="type,mode,gid,uid,size,link,time"
-DEFAULT_MTREE_KEYWORDS="type,mode,gid,uid,size,link"
TEST_IMAGE="$TMPDIR/test.img" TEST_IMAGE="$TMPDIR/test.img"
TEST_INPUTS_DIR="$TMPDIR/inputs" TEST_INPUTS_DIR="$TMPDIR/inputs"
TEST_MD_DEVICE_FILE="$TMPDIR/md.output" TEST_MD_DEVICE_FILE="$TMPDIR/md.output"