提交 6787dc24 编写于 作者: L Linus Torvalds

Merge tag '4.16-rc-SMB3' of git://git.samba.org/sfrench/cifs-2.6

Pull cifs updates from Steve French:
 "Some fixes for stable, fixed SMB3 DFS support, SMB3 Direct (RDMA) and
  various bug fixes and cleanup"

* tag '4.16-rc-SMB3' of git://git.samba.org/sfrench/cifs-2.6: (60 commits)
  fs/cifs/cifsacl.c Fixes typo in a comment
  update internal version number for cifs.ko
  cifs: add .splice_write
  CIFS: document tcon/ses/server refcount dance
  move a few externs to smbdirect.h to eliminate warning
  CIFS: zero sensitive data when freeing
  Cleanup some minor endian issues in smb3 rdma
  CIFS: dump IPC tcon in debug proc file
  CIFS: use tcon_ipc instead of use_ipc parameter of SMB2_ioctl
  CIFS: make IPC a regular tcon
  cifs: remove redundant duplicated assignment of pointer 'node'
  CIFS: SMBD: work around gcc -Wmaybe-uninitialized warning
  cifs: Fix autonegotiate security settings mismatch
  CIFS: SMBD: _smbd_get_connection() can be static
  CIFS: SMBD: Disable signing on SMB direct transport
  CIFS: SMBD: Add SMB Direct debug counters
  CIFS: SMBD: Upper layer performs SMB read via RDMA write through memory registration
  CIFS: SMBD: Read correct returned data length for RDMA write (SMB read) I/O
  CIFS: SMBD: Upper layer performs SMB write via RDMA read through memory registration
  CIFS: SMBD: Implement RDMA memory registration
  ...
...@@ -196,6 +196,14 @@ config CIFS_SMB311 ...@@ -196,6 +196,14 @@ config CIFS_SMB311
This dialect includes improved security negotiation features. This dialect includes improved security negotiation features.
If unsure, say N If unsure, say N
config CIFS_SMB_DIRECT
bool "SMB Direct support (Experimental)"
depends on CIFS=m && INFINIBAND || CIFS=y && INFINIBAND=y
help
Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
SMB Direct allows transferring SMB packets over RDMA. If unsure,
say N.
config CIFS_FSCACHE config CIFS_FSCACHE
bool "Provide CIFS client caching support" bool "Provide CIFS client caching support"
depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
......
...@@ -19,3 +19,5 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o ...@@ -19,3 +19,5 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
...@@ -30,6 +30,9 @@ ...@@ -30,6 +30,9 @@
#include "cifsproto.h" #include "cifsproto.h"
#include "cifs_debug.h" #include "cifs_debug.h"
#include "cifsfs.h" #include "cifsfs.h"
#ifdef CONFIG_CIFS_SMB_DIRECT
#include "smbdirect.h"
#endif
void void
cifs_dump_mem(char *label, void *data, int length) cifs_dump_mem(char *label, void *data, int length)
...@@ -107,6 +110,32 @@ void cifs_dump_mids(struct TCP_Server_Info *server) ...@@ -107,6 +110,32 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
} }
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
{
__u32 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
seq_printf(m, "%s Mounts: %d ", tcon->treeName, tcon->tc_count);
if (tcon->nativeFileSystem)
seq_printf(m, "Type: %s ", tcon->nativeFileSystem);
seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x\n\tPathComponentMax: %d Status: %d",
le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
le32_to_cpu(tcon->fsAttrInfo.Attributes),
le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
tcon->tidStatus);
if (dev_type == FILE_DEVICE_DISK)
seq_puts(m, " type: DISK ");
else if (dev_type == FILE_DEVICE_CD_ROM)
seq_puts(m, " type: CDROM ");
else
seq_printf(m, " type: %d ", dev_type);
if (tcon->ses->server->ops->dump_share_caps)
tcon->ses->server->ops->dump_share_caps(m, tcon);
if (tcon->need_reconnect)
seq_puts(m, "\tDISCONNECTED ");
seq_putc(m, '\n');
}
static int cifs_debug_data_proc_show(struct seq_file *m, void *v) static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
{ {
struct list_head *tmp1, *tmp2, *tmp3; struct list_head *tmp1, *tmp2, *tmp3;
...@@ -115,7 +144,6 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) ...@@ -115,7 +144,6 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct cifs_ses *ses; struct cifs_ses *ses;
struct cifs_tcon *tcon; struct cifs_tcon *tcon;
int i, j; int i, j;
__u32 dev_type;
seq_puts(m, seq_puts(m,
"Display Internal CIFS Data Structures for Debugging\n" "Display Internal CIFS Data Structures for Debugging\n"
...@@ -152,6 +180,72 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) ...@@ -152,6 +180,72 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
list_for_each(tmp1, &cifs_tcp_ses_list) { list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info, server = list_entry(tmp1, struct TCP_Server_Info,
tcp_ses_list); tcp_ses_list);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (!server->rdma)
goto skip_rdma;
seq_printf(m, "\nSMBDirect (in hex) protocol version: %x "
"transport status: %x",
server->smbd_conn->protocol,
server->smbd_conn->transport_status);
seq_printf(m, "\nConn receive_credit_max: %x "
"send_credit_target: %x max_send_size: %x",
server->smbd_conn->receive_credit_max,
server->smbd_conn->send_credit_target,
server->smbd_conn->max_send_size);
seq_printf(m, "\nConn max_fragmented_recv_size: %x "
"max_fragmented_send_size: %x max_receive_size:%x",
server->smbd_conn->max_fragmented_recv_size,
server->smbd_conn->max_fragmented_send_size,
server->smbd_conn->max_receive_size);
seq_printf(m, "\nConn keep_alive_interval: %x "
"max_readwrite_size: %x rdma_readwrite_threshold: %x",
server->smbd_conn->keep_alive_interval,
server->smbd_conn->max_readwrite_size,
server->smbd_conn->rdma_readwrite_threshold);
seq_printf(m, "\nDebug count_get_receive_buffer: %x "
"count_put_receive_buffer: %x count_send_empty: %x",
server->smbd_conn->count_get_receive_buffer,
server->smbd_conn->count_put_receive_buffer,
server->smbd_conn->count_send_empty);
seq_printf(m, "\nRead Queue count_reassembly_queue: %x "
"count_enqueue_reassembly_queue: %x "
"count_dequeue_reassembly_queue: %x "
"fragment_reassembly_remaining: %x "
"reassembly_data_length: %x "
"reassembly_queue_length: %x",
server->smbd_conn->count_reassembly_queue,
server->smbd_conn->count_enqueue_reassembly_queue,
server->smbd_conn->count_dequeue_reassembly_queue,
server->smbd_conn->fragment_reassembly_remaining,
server->smbd_conn->reassembly_data_length,
server->smbd_conn->reassembly_queue_length);
seq_printf(m, "\nCurrent Credits send_credits: %x "
"receive_credits: %x receive_credit_target: %x",
atomic_read(&server->smbd_conn->send_credits),
atomic_read(&server->smbd_conn->receive_credits),
server->smbd_conn->receive_credit_target);
seq_printf(m, "\nPending send_pending: %x send_payload_pending:"
" %x smbd_send_pending: %x smbd_recv_pending: %x",
atomic_read(&server->smbd_conn->send_pending),
atomic_read(&server->smbd_conn->send_payload_pending),
server->smbd_conn->smbd_send_pending,
server->smbd_conn->smbd_recv_pending);
seq_printf(m, "\nReceive buffers count_receive_queue: %x "
"count_empty_packet_queue: %x",
server->smbd_conn->count_receive_queue,
server->smbd_conn->count_empty_packet_queue);
seq_printf(m, "\nMR responder_resources: %x "
"max_frmr_depth: %x mr_type: %x",
server->smbd_conn->responder_resources,
server->smbd_conn->max_frmr_depth,
server->smbd_conn->mr_type);
seq_printf(m, "\nMR mr_ready_count: %x mr_used_count: %x",
atomic_read(&server->smbd_conn->mr_ready_count),
atomic_read(&server->smbd_conn->mr_used_count));
skip_rdma:
#endif
seq_printf(m, "\nNumber of credits: %d", server->credits); seq_printf(m, "\nNumber of credits: %d", server->credits);
i++; i++;
list_for_each(tmp2, &server->smb_ses_list) { list_for_each(tmp2, &server->smb_ses_list) {
...@@ -176,6 +270,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) ...@@ -176,6 +270,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
ses->ses_count, ses->serverOS, ses->serverNOS, ses->ses_count, ses->serverOS, ses->serverNOS,
ses->capabilities, ses->status); ses->capabilities, ses->status);
} }
if (server->rdma)
seq_printf(m, "RDMA\n\t");
seq_printf(m, "TCP status: %d\n\tLocal Users To " seq_printf(m, "TCP status: %d\n\tLocal Users To "
"Server: %d SecMode: 0x%x Req On Wire: %d", "Server: %d SecMode: 0x%x Req On Wire: %d",
server->tcpStatus, server->srv_count, server->tcpStatus, server->srv_count,
...@@ -189,35 +285,19 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) ...@@ -189,35 +285,19 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_puts(m, "\n\tShares:"); seq_puts(m, "\n\tShares:");
j = 0; j = 0;
seq_printf(m, "\n\t%d) IPC: ", j);
if (ses->tcon_ipc)
cifs_debug_tcon(m, ses->tcon_ipc);
else
seq_puts(m, "none\n");
list_for_each(tmp3, &ses->tcon_list) { list_for_each(tmp3, &ses->tcon_list) {
tcon = list_entry(tmp3, struct cifs_tcon, tcon = list_entry(tmp3, struct cifs_tcon,
tcon_list); tcon_list);
++j; ++j;
dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); seq_printf(m, "\n\t%d) ", j);
seq_printf(m, "\n\t%d) %s Mounts: %d ", j, cifs_debug_tcon(m, tcon);
tcon->treeName, tcon->tc_count);
if (tcon->nativeFileSystem) {
seq_printf(m, "Type: %s ",
tcon->nativeFileSystem);
}
seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
"\n\tPathComponentMax: %d Status: %d",
le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
le32_to_cpu(tcon->fsAttrInfo.Attributes),
le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
tcon->tidStatus);
if (dev_type == FILE_DEVICE_DISK)
seq_puts(m, " type: DISK ");
else if (dev_type == FILE_DEVICE_CD_ROM)
seq_puts(m, " type: CDROM ");
else
seq_printf(m, " type: %d ", dev_type);
if (server->ops->dump_share_caps)
server->ops->dump_share_caps(m, tcon);
if (tcon->need_reconnect)
seq_puts(m, "\tDISCONNECTED ");
seq_putc(m, '\n');
} }
seq_puts(m, "\n\tMIDs:\n"); seq_puts(m, "\n\tMIDs:\n");
...@@ -374,6 +454,45 @@ static const struct file_operations cifs_stats_proc_fops = { ...@@ -374,6 +454,45 @@ static const struct file_operations cifs_stats_proc_fops = {
}; };
#endif /* STATS */ #endif /* STATS */
#ifdef CONFIG_CIFS_SMB_DIRECT
#define PROC_FILE_DEFINE(name) \
static ssize_t name##_write(struct file *file, const char __user *buffer, \
size_t count, loff_t *ppos) \
{ \
int rc; \
rc = kstrtoint_from_user(buffer, count, 10, & name); \
if (rc) \
return rc; \
return count; \
} \
static int name##_proc_show(struct seq_file *m, void *v) \
{ \
seq_printf(m, "%d\n", name ); \
return 0; \
} \
static int name##_open(struct inode *inode, struct file *file) \
{ \
return single_open(file, name##_proc_show, NULL); \
} \
\
static const struct file_operations cifs_##name##_proc_fops = { \
.open = name##_open, \
.read = seq_read, \
.llseek = seq_lseek, \
.release = single_release, \
.write = name##_write, \
}
PROC_FILE_DEFINE(rdma_readwrite_threshold);
PROC_FILE_DEFINE(smbd_max_frmr_depth);
PROC_FILE_DEFINE(smbd_keep_alive_interval);
PROC_FILE_DEFINE(smbd_max_receive_size);
PROC_FILE_DEFINE(smbd_max_fragmented_recv_size);
PROC_FILE_DEFINE(smbd_max_send_size);
PROC_FILE_DEFINE(smbd_send_credit_target);
PROC_FILE_DEFINE(smbd_receive_credit_max);
#endif
static struct proc_dir_entry *proc_fs_cifs; static struct proc_dir_entry *proc_fs_cifs;
static const struct file_operations cifsFYI_proc_fops; static const struct file_operations cifsFYI_proc_fops;
static const struct file_operations cifs_lookup_cache_proc_fops; static const struct file_operations cifs_lookup_cache_proc_fops;
...@@ -401,6 +520,24 @@ cifs_proc_init(void) ...@@ -401,6 +520,24 @@ cifs_proc_init(void)
&cifs_security_flags_proc_fops); &cifs_security_flags_proc_fops);
proc_create("LookupCacheEnabled", 0, proc_fs_cifs, proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
&cifs_lookup_cache_proc_fops); &cifs_lookup_cache_proc_fops);
#ifdef CONFIG_CIFS_SMB_DIRECT
proc_create("rdma_readwrite_threshold", 0, proc_fs_cifs,
&cifs_rdma_readwrite_threshold_proc_fops);
proc_create("smbd_max_frmr_depth", 0, proc_fs_cifs,
&cifs_smbd_max_frmr_depth_proc_fops);
proc_create("smbd_keep_alive_interval", 0, proc_fs_cifs,
&cifs_smbd_keep_alive_interval_proc_fops);
proc_create("smbd_max_receive_size", 0, proc_fs_cifs,
&cifs_smbd_max_receive_size_proc_fops);
proc_create("smbd_max_fragmented_recv_size", 0, proc_fs_cifs,
&cifs_smbd_max_fragmented_recv_size_proc_fops);
proc_create("smbd_max_send_size", 0, proc_fs_cifs,
&cifs_smbd_max_send_size_proc_fops);
proc_create("smbd_send_credit_target", 0, proc_fs_cifs,
&cifs_smbd_send_credit_target_proc_fops);
proc_create("smbd_receive_credit_max", 0, proc_fs_cifs,
&cifs_smbd_receive_credit_max_proc_fops);
#endif
} }
void void
...@@ -418,6 +555,16 @@ cifs_proc_clean(void) ...@@ -418,6 +555,16 @@ cifs_proc_clean(void)
remove_proc_entry("SecurityFlags", proc_fs_cifs); remove_proc_entry("SecurityFlags", proc_fs_cifs);
remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
#ifdef CONFIG_CIFS_SMB_DIRECT
remove_proc_entry("rdma_readwrite_threshold", proc_fs_cifs);
remove_proc_entry("smbd_max_frmr_depth", proc_fs_cifs);
remove_proc_entry("smbd_keep_alive_interval", proc_fs_cifs);
remove_proc_entry("smbd_max_receive_size", proc_fs_cifs);
remove_proc_entry("smbd_max_fragmented_recv_size", proc_fs_cifs);
remove_proc_entry("smbd_max_send_size", proc_fs_cifs);
remove_proc_entry("smbd_send_credit_target", proc_fs_cifs);
remove_proc_entry("smbd_receive_credit_max", proc_fs_cifs);
#endif
remove_proc_entry("fs/cifs", NULL); remove_proc_entry("fs/cifs", NULL);
} }
......
...@@ -1125,7 +1125,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, ...@@ -1125,7 +1125,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
return rc; return rc;
} }
/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ /* Translate the CIFS ACL (similar to NTFS ACL) for a file into mode bits */
int int
cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
struct inode *inode, const char *path, struct inode *inode, const char *path,
......
...@@ -325,9 +325,8 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, ...@@ -325,9 +325,8 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
{ {
int i; int i;
int rc; int rc;
char password_with_pad[CIFS_ENCPWD_SIZE]; char password_with_pad[CIFS_ENCPWD_SIZE] = {0};
memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
if (password) if (password)
strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE);
......
...@@ -327,6 +327,8 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) ...@@ -327,6 +327,8 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
default: default:
seq_puts(s, "(unknown)"); seq_puts(s, "(unknown)");
} }
if (server->rdma)
seq_puts(s, ",rdma");
} }
static void static void
...@@ -1068,6 +1070,7 @@ const struct file_operations cifs_file_ops = { ...@@ -1068,6 +1070,7 @@ const struct file_operations cifs_file_ops = {
.flush = cifs_flush, .flush = cifs_flush,
.mmap = cifs_file_mmap, .mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek, .llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl, .unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range, .copy_file_range = cifs_copy_file_range,
...@@ -1086,6 +1089,7 @@ const struct file_operations cifs_file_strict_ops = { ...@@ -1086,6 +1089,7 @@ const struct file_operations cifs_file_strict_ops = {
.flush = cifs_flush, .flush = cifs_flush,
.mmap = cifs_file_strict_mmap, .mmap = cifs_file_strict_mmap,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek, .llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl, .unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range, .copy_file_range = cifs_copy_file_range,
...@@ -1105,6 +1109,7 @@ const struct file_operations cifs_file_direct_ops = { ...@@ -1105,6 +1109,7 @@ const struct file_operations cifs_file_direct_ops = {
.flush = cifs_flush, .flush = cifs_flush,
.mmap = cifs_file_mmap, .mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl, .unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range, .copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range, .clone_file_range = cifs_clone_file_range,
...@@ -1122,6 +1127,7 @@ const struct file_operations cifs_file_nobrl_ops = { ...@@ -1122,6 +1127,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.flush = cifs_flush, .flush = cifs_flush,
.mmap = cifs_file_mmap, .mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek, .llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl, .unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range, .copy_file_range = cifs_copy_file_range,
...@@ -1139,6 +1145,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { ...@@ -1139,6 +1145,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.flush = cifs_flush, .flush = cifs_flush,
.mmap = cifs_file_strict_mmap, .mmap = cifs_file_strict_mmap,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek, .llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl, .unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range, .copy_file_range = cifs_copy_file_range,
...@@ -1157,6 +1164,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { ...@@ -1157,6 +1164,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.flush = cifs_flush, .flush = cifs_flush,
.mmap = cifs_file_mmap, .mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl, .unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range, .copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range, .clone_file_range = cifs_clone_file_range,
......
...@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); ...@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops; extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */ #endif /* CONFIG_CIFS_NFSD_EXPORT */
#define CIFS_VERSION "2.10" #define CIFS_VERSION "2.11"
#endif /* _CIFSFS_H */ #endif /* _CIFSFS_H */
...@@ -64,8 +64,8 @@ ...@@ -64,8 +64,8 @@
#define RFC1001_NAME_LEN 15 #define RFC1001_NAME_LEN 15
#define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1)
/* currently length of NIP6_FMT */ /* maximum length of ip addr as a string (including ipv6 and sctp) */
#define SERVER_NAME_LENGTH 40 #define SERVER_NAME_LENGTH 80
#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
/* echo interval in seconds */ /* echo interval in seconds */
...@@ -230,8 +230,14 @@ struct smb_version_operations { ...@@ -230,8 +230,14 @@ struct smb_version_operations {
__u64 (*get_next_mid)(struct TCP_Server_Info *); __u64 (*get_next_mid)(struct TCP_Server_Info *);
/* data offset from read response message */ /* data offset from read response message */
unsigned int (*read_data_offset)(char *); unsigned int (*read_data_offset)(char *);
/* data length from read response message */ /*
unsigned int (*read_data_length)(char *); * Data length from read response message
* When in_remaining is true, the returned data length is in
* message field DataRemaining for out-of-band data read (e.g through
* Memory Registration RDMA write in SMBD).
* Otherwise, the returned data length is in message field DataLength.
*/
unsigned int (*read_data_length)(char *, bool in_remaining);
/* map smb to linux error */ /* map smb to linux error */
int (*map_error)(char *, bool); int (*map_error)(char *, bool);
/* find mid corresponding to the response message */ /* find mid corresponding to the response message */
...@@ -532,6 +538,7 @@ struct smb_vol { ...@@ -532,6 +538,7 @@ struct smb_vol {
bool nopersistent:1; bool nopersistent:1;
bool resilient:1; /* noresilient not required since not fored for CA */ bool resilient:1; /* noresilient not required since not fored for CA */
bool domainauto:1; bool domainauto:1;
bool rdma:1;
unsigned int rsize; unsigned int rsize;
unsigned int wsize; unsigned int wsize;
bool sockopt_tcp_nodelay:1; bool sockopt_tcp_nodelay:1;
...@@ -648,6 +655,10 @@ struct TCP_Server_Info { ...@@ -648,6 +655,10 @@ struct TCP_Server_Info {
bool sec_kerberos; /* supports plain Kerberos */ bool sec_kerberos; /* supports plain Kerberos */
bool sec_mskerberos; /* supports legacy MS Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */
bool large_buf; /* is current buffer large? */ bool large_buf; /* is current buffer large? */
/* use SMBD connection instead of socket */
bool rdma;
/* point to the SMBD connection if RDMA is used instead of socket */
struct smbd_connection *smbd_conn;
struct delayed_work echo; /* echo ping workqueue job */ struct delayed_work echo; /* echo ping workqueue job */
char *smallbuf; /* pointer to current "small" buffer */ char *smallbuf; /* pointer to current "small" buffer */
char *bigbuf; /* pointer to current "big" buffer */ char *bigbuf; /* pointer to current "big" buffer */
...@@ -822,12 +833,12 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) ...@@ -822,12 +833,12 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
struct cifs_ses { struct cifs_ses {
struct list_head smb_ses_list; struct list_head smb_ses_list;
struct list_head tcon_list; struct list_head tcon_list;
struct cifs_tcon *tcon_ipc;
struct mutex session_mutex; struct mutex session_mutex;
struct TCP_Server_Info *server; /* pointer to server info */ struct TCP_Server_Info *server; /* pointer to server info */
int ses_count; /* reference counter */ int ses_count; /* reference counter */
enum statusEnum status; enum statusEnum status;
unsigned overrideSecFlg; /* if non-zero override global sec flags */ unsigned overrideSecFlg; /* if non-zero override global sec flags */
__u32 ipc_tid; /* special tid for connection to IPC share */
char *serverOS; /* name of operating system underlying server */ char *serverOS; /* name of operating system underlying server */
char *serverNOS; /* name of network operating system of server */ char *serverNOS; /* name of network operating system of server */
char *serverDomain; /* security realm of server */ char *serverDomain; /* security realm of server */
...@@ -835,8 +846,7 @@ struct cifs_ses { ...@@ -835,8 +846,7 @@ struct cifs_ses {
kuid_t linux_uid; /* overriding owner of files on the mount */ kuid_t linux_uid; /* overriding owner of files on the mount */
kuid_t cred_uid; /* owner of credentials */ kuid_t cred_uid; /* owner of credentials */
unsigned int capabilities; unsigned int capabilities;
char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for char serverName[SERVER_NAME_LEN_WITH_NULL];
TCP names - will ipv6 and sctp addresses fit? */
char *user_name; /* must not be null except during init of sess char *user_name; /* must not be null except during init of sess
and after mount option parsing we fill it */ and after mount option parsing we fill it */
char *domainName; char *domainName;
...@@ -931,7 +941,9 @@ struct cifs_tcon { ...@@ -931,7 +941,9 @@ struct cifs_tcon {
FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_DEVICE_INFO fsDevInfo;
FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
FILE_SYSTEM_UNIX_INFO fsUnixInfo; FILE_SYSTEM_UNIX_INFO fsUnixInfo;
bool ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */ bool ipc:1; /* set if connection to IPC$ share (always also pipe) */
bool pipe:1; /* set if connection to pipe share */
bool print:1; /* set if connection to printer share */
bool retry:1; bool retry:1;
bool nocase:1; bool nocase:1;
bool seal:1; /* transport encryption for this mounted share */ bool seal:1; /* transport encryption for this mounted share */
...@@ -944,7 +956,6 @@ struct cifs_tcon { ...@@ -944,7 +956,6 @@ struct cifs_tcon {
bool need_reopen_files:1; /* need to reopen tcon file handles */ bool need_reopen_files:1; /* need to reopen tcon file handles */
bool use_resilient:1; /* use resilient instead of durable handles */ bool use_resilient:1; /* use resilient instead of durable handles */
bool use_persistent:1; /* use persistent instead of durable handles */ bool use_persistent:1; /* use persistent instead of durable handles */
bool print:1; /* set if connection to printer share */
__le32 capabilities; __le32 capabilities;
__u32 share_flags; __u32 share_flags;
__u32 maximal_access; __u32 maximal_access;
...@@ -1147,6 +1158,9 @@ struct cifs_readdata { ...@@ -1147,6 +1158,9 @@ struct cifs_readdata {
struct cifs_readdata *rdata, struct cifs_readdata *rdata,
struct iov_iter *iter); struct iov_iter *iter);
struct kvec iov[2]; struct kvec iov[2];
#ifdef CONFIG_CIFS_SMB_DIRECT
struct smbd_mr *mr;
#endif
unsigned int pagesz; unsigned int pagesz;
unsigned int tailsz; unsigned int tailsz;
unsigned int credits; unsigned int credits;
...@@ -1169,6 +1183,9 @@ struct cifs_writedata { ...@@ -1169,6 +1183,9 @@ struct cifs_writedata {
pid_t pid; pid_t pid;
unsigned int bytes; unsigned int bytes;
int result; int result;
#ifdef CONFIG_CIFS_SMB_DIRECT
struct smbd_mr *mr;
#endif
unsigned int pagesz; unsigned int pagesz;
unsigned int tailsz; unsigned int tailsz;
unsigned int credits; unsigned int credits;
......
...@@ -106,6 +106,10 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, ...@@ -106,6 +106,10 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
struct kvec *, int /* nvec to send */, struct kvec *, int /* nvec to send */,
int * /* type of buf returned */, const int flags, int * /* type of buf returned */, const int flags,
struct kvec * /* resp vec */); struct kvec * /* resp vec */);
extern int smb2_send_recv(const unsigned int xid, struct cifs_ses *pses,
struct kvec *pkvec, int nvec_to_send,
int *pbuftype, const int flags,
struct kvec *presp);
extern int SendReceiveBlockingLock(const unsigned int xid, extern int SendReceiveBlockingLock(const unsigned int xid,
struct cifs_tcon *ptcon, struct cifs_tcon *ptcon,
struct smb_hdr *in_buf , struct smb_hdr *in_buf ,
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include "cifs_unicode.h" #include "cifs_unicode.h"
#include "cifs_debug.h" #include "cifs_debug.h"
#include "fscache.h" #include "fscache.h"
#include "smbdirect.h"
#ifdef CONFIG_CIFS_POSIX #ifdef CONFIG_CIFS_POSIX
static struct { static struct {
...@@ -1454,6 +1455,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) ...@@ -1454,6 +1455,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
struct cifs_readdata *rdata = mid->callback_data; struct cifs_readdata *rdata = mid->callback_data;
char *buf = server->smallbuf; char *buf = server->smallbuf;
unsigned int buflen = get_rfc1002_length(buf) + 4; unsigned int buflen = get_rfc1002_length(buf) + 4;
bool use_rdma_mr = false;
cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n", cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n",
__func__, mid->mid, rdata->offset, rdata->bytes); __func__, mid->mid, rdata->offset, rdata->bytes);
...@@ -1542,8 +1544,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) ...@@ -1542,8 +1544,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
rdata->iov[0].iov_base, server->total_read); rdata->iov[0].iov_base, server->total_read);
/* how much data is in the response? */ /* how much data is in the response? */
data_len = server->ops->read_data_length(buf); #ifdef CONFIG_CIFS_SMB_DIRECT
if (data_offset + data_len > buflen) { use_rdma_mr = rdata->mr;
#endif
data_len = server->ops->read_data_length(buf, use_rdma_mr);
if (!use_rdma_mr && (data_offset + data_len > buflen)) {
/* data_len is corrupt -- discard frame */ /* data_len is corrupt -- discard frame */
rdata->result = -EIO; rdata->result = -EIO;
return cifs_readv_discard(server, mid); return cifs_readv_discard(server, mid);
...@@ -1923,6 +1928,12 @@ cifs_writedata_release(struct kref *refcount) ...@@ -1923,6 +1928,12 @@ cifs_writedata_release(struct kref *refcount)
{ {
struct cifs_writedata *wdata = container_of(refcount, struct cifs_writedata *wdata = container_of(refcount,
struct cifs_writedata, refcount); struct cifs_writedata, refcount);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (wdata->mr) {
smbd_deregister_mr(wdata->mr);
wdata->mr = NULL;
}
#endif
if (wdata->cfile) if (wdata->cfile)
cifsFileInfo_put(wdata->cfile); cifsFileInfo_put(wdata->cfile);
...@@ -4822,10 +4833,11 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, ...@@ -4822,10 +4833,11 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
*target_nodes = NULL; *target_nodes = NULL;
cifs_dbg(FYI, "In GetDFSRefer the path %s\n", search_name); cifs_dbg(FYI, "In GetDFSRefer the path %s\n", search_name);
if (ses == NULL) if (ses == NULL || ses->tcon_ipc == NULL)
return -ENODEV; return -ENODEV;
getDFSRetry: getDFSRetry:
rc = smb_init(SMB_COM_TRANSACTION2, 15, NULL, (void **) &pSMB, rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB,
(void **) &pSMBr); (void **) &pSMBr);
if (rc) if (rc)
return rc; return rc;
...@@ -4833,7 +4845,7 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, ...@@ -4833,7 +4845,7 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
/* server pointer checked in called function, /* server pointer checked in called function,
but should never be null here anyway */ but should never be null here anyway */
pSMB->hdr.Mid = get_next_mid(ses->server); pSMB->hdr.Mid = get_next_mid(ses->server);
pSMB->hdr.Tid = ses->ipc_tid; pSMB->hdr.Tid = ses->tcon_ipc->tid;
pSMB->hdr.Uid = ses->Suid; pSMB->hdr.Uid = ses->Suid;
if (ses->capabilities & CAP_STATUS32) if (ses->capabilities & CAP_STATUS32)
pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS; pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS;
......
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
#include <net/ipv6.h> #include <net/ipv6.h>
#include <linux/parser.h> #include <linux/parser.h>
#include <linux/bvec.h> #include <linux/bvec.h>
#include "cifspdu.h" #include "cifspdu.h"
#include "cifsglob.h" #include "cifsglob.h"
#include "cifsproto.h" #include "cifsproto.h"
...@@ -56,6 +55,7 @@ ...@@ -56,6 +55,7 @@
#include "rfc1002pdu.h" #include "rfc1002pdu.h"
#include "fscache.h" #include "fscache.h"
#include "smb2proto.h" #include "smb2proto.h"
#include "smbdirect.h"
#define CIFS_PORT 445 #define CIFS_PORT 445
#define RFC1001_PORT 139 #define RFC1001_PORT 139
...@@ -92,7 +92,7 @@ enum { ...@@ -92,7 +92,7 @@ enum {
Opt_multiuser, Opt_sloppy, Opt_nosharesock, Opt_multiuser, Opt_sloppy, Opt_nosharesock,
Opt_persistent, Opt_nopersistent, Opt_persistent, Opt_nopersistent,
Opt_resilient, Opt_noresilient, Opt_resilient, Opt_noresilient,
Opt_domainauto, Opt_domainauto, Opt_rdma,
/* Mount options which take numeric value */ /* Mount options which take numeric value */
Opt_backupuid, Opt_backupgid, Opt_uid, Opt_backupuid, Opt_backupgid, Opt_uid,
...@@ -183,6 +183,7 @@ static const match_table_t cifs_mount_option_tokens = { ...@@ -183,6 +183,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_resilient, "resilienthandles"}, { Opt_resilient, "resilienthandles"},
{ Opt_noresilient, "noresilienthandles"}, { Opt_noresilient, "noresilienthandles"},
{ Opt_domainauto, "domainauto"}, { Opt_domainauto, "domainauto"},
{ Opt_rdma, "rdma"},
{ Opt_backupuid, "backupuid=%s" }, { Opt_backupuid, "backupuid=%s" },
{ Opt_backupgid, "backupgid=%s" }, { Opt_backupgid, "backupgid=%s" },
...@@ -353,11 +354,12 @@ cifs_reconnect(struct TCP_Server_Info *server) ...@@ -353,11 +354,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
list_for_each(tmp, &server->smb_ses_list) { list_for_each(tmp, &server->smb_ses_list) {
ses = list_entry(tmp, struct cifs_ses, smb_ses_list); ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
ses->need_reconnect = true; ses->need_reconnect = true;
ses->ipc_tid = 0;
list_for_each(tmp2, &ses->tcon_list) { list_for_each(tmp2, &ses->tcon_list) {
tcon = list_entry(tmp2, struct cifs_tcon, tcon_list); tcon = list_entry(tmp2, struct cifs_tcon, tcon_list);
tcon->need_reconnect = true; tcon->need_reconnect = true;
} }
if (ses->tcon_ipc)
ses->tcon_ipc->need_reconnect = true;
} }
spin_unlock(&cifs_tcp_ses_lock); spin_unlock(&cifs_tcp_ses_lock);
...@@ -405,6 +407,9 @@ cifs_reconnect(struct TCP_Server_Info *server) ...@@ -405,6 +407,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
/* we should try only the port we connected to before */ /* we should try only the port we connected to before */
mutex_lock(&server->srv_mutex); mutex_lock(&server->srv_mutex);
if (cifs_rdma_enabled(server))
rc = smbd_reconnect(server);
else
rc = generic_ip_connect(server); rc = generic_ip_connect(server);
if (rc) { if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc); cifs_dbg(FYI, "reconnect error %d\n", rc);
...@@ -538,7 +543,9 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) ...@@ -538,7 +543,9 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
if (server_unresponsive(server)) if (server_unresponsive(server))
return -ECONNABORTED; return -ECONNABORTED;
if (cifs_rdma_enabled(server) && server->smbd_conn)
length = smbd_recv(server->smbd_conn, smb_msg);
else
length = sock_recvmsg(server->ssocket, smb_msg, 0); length = sock_recvmsg(server->ssocket, smb_msg, 0);
if (server->tcpStatus == CifsExiting) if (server->tcpStatus == CifsExiting)
...@@ -700,7 +707,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) ...@@ -700,7 +707,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
wake_up_all(&server->request_q); wake_up_all(&server->request_q);
/* give those requests time to exit */ /* give those requests time to exit */
msleep(125); msleep(125);
if (cifs_rdma_enabled(server) && server->smbd_conn) {
smbd_destroy(server->smbd_conn);
server->smbd_conn = NULL;
}
if (server->ssocket) { if (server->ssocket) {
sock_release(server->ssocket); sock_release(server->ssocket);
server->ssocket = NULL; server->ssocket = NULL;
...@@ -1550,6 +1560,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, ...@@ -1550,6 +1560,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
case Opt_domainauto: case Opt_domainauto:
vol->domainauto = true; vol->domainauto = true;
break; break;
case Opt_rdma:
vol->rdma = true;
break;
/* Numeric Values */ /* Numeric Values */
case Opt_backupuid: case Opt_backupuid:
...@@ -1707,7 +1720,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, ...@@ -1707,7 +1720,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
tmp_end++; tmp_end++;
if (!(tmp_end < end && tmp_end[1] == delim)) { if (!(tmp_end < end && tmp_end[1] == delim)) {
/* No it is not. Set the password to NULL */ /* No it is not. Set the password to NULL */
kfree(vol->password); kzfree(vol->password);
vol->password = NULL; vol->password = NULL;
break; break;
} }
...@@ -1745,7 +1758,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, ...@@ -1745,7 +1758,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
options = end; options = end;
} }
kfree(vol->password); kzfree(vol->password);
/* Now build new password string */ /* Now build new password string */
temp_len = strlen(value); temp_len = strlen(value);
vol->password = kzalloc(temp_len+1, GFP_KERNEL); vol->password = kzalloc(temp_len+1, GFP_KERNEL);
...@@ -1951,6 +1964,19 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, ...@@ -1951,6 +1964,19 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err; goto cifs_parse_mount_err;
} }
if (vol->rdma && vol->vals->protocol_id < SMB30_PROT_ID) {
cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n");
goto cifs_parse_mount_err;
}
#ifdef CONFIG_CIFS_SMB_DIRECT
if (vol->rdma && vol->sign) {
cifs_dbg(VFS, "Currently SMB direct doesn't support signing."
" This is being fixed\n");
goto cifs_parse_mount_err;
}
#endif
#ifndef CONFIG_KEYS #ifndef CONFIG_KEYS
/* Muliuser mounts require CONFIG_KEYS support */ /* Muliuser mounts require CONFIG_KEYS support */
if (vol->multiuser) { if (vol->multiuser) {
...@@ -2162,6 +2188,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) ...@@ -2162,6 +2188,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
if (server->echo_interval != vol->echo_interval * HZ) if (server->echo_interval != vol->echo_interval * HZ)
return 0; return 0;
if (server->rdma != vol->rdma)
return 0;
return 1; return 1;
} }
...@@ -2260,6 +2289,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) ...@@ -2260,6 +2289,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
tcp_ses->noblocksnd = volume_info->noblocksnd; tcp_ses->noblocksnd = volume_info->noblocksnd;
tcp_ses->noautotune = volume_info->noautotune; tcp_ses->noautotune = volume_info->noautotune;
tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
tcp_ses->rdma = volume_info->rdma;
tcp_ses->in_flight = 0; tcp_ses->in_flight = 0;
tcp_ses->credits = 1; tcp_ses->credits = 1;
init_waitqueue_head(&tcp_ses->response_q); init_waitqueue_head(&tcp_ses->response_q);
...@@ -2297,13 +2327,29 @@ cifs_get_tcp_session(struct smb_vol *volume_info) ...@@ -2297,13 +2327,29 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
tcp_ses->echo_interval = volume_info->echo_interval * HZ; tcp_ses->echo_interval = volume_info->echo_interval * HZ;
else else
tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ; tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
if (tcp_ses->rdma) {
#ifndef CONFIG_CIFS_SMB_DIRECT
cifs_dbg(VFS, "CONFIG_CIFS_SMB_DIRECT is not enabled\n");
rc = -ENOENT;
goto out_err_crypto_release;
#endif
tcp_ses->smbd_conn = smbd_get_connection(
tcp_ses, (struct sockaddr *)&volume_info->dstaddr);
if (tcp_ses->smbd_conn) {
cifs_dbg(VFS, "RDMA transport established\n");
rc = 0;
goto smbd_connected;
} else {
rc = -ENOENT;
goto out_err_crypto_release;
}
}
rc = ip_connect(tcp_ses); rc = ip_connect(tcp_ses);
if (rc < 0) { if (rc < 0) {
cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n"); cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
goto out_err_crypto_release; goto out_err_crypto_release;
} }
smbd_connected:
/* /*
* since we're in a cifs function already, we know that * since we're in a cifs function already, we know that
* this will succeed. No need for try_module_get(). * this will succeed. No need for try_module_get().
...@@ -2381,6 +2427,93 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol) ...@@ -2381,6 +2427,93 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
return 1; return 1;
} }
/**
* cifs_setup_ipc - helper to setup the IPC tcon for the session
*
* A new IPC connection is made and stored in the session
* tcon_ipc. The IPC tcon has the same lifetime as the session.
*/
static int
cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
{
int rc = 0, xid;
struct cifs_tcon *tcon;
struct nls_table *nls_codepage;
char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0};
bool seal = false;
/*
* If the mount request that resulted in the creation of the
* session requires encryption, force IPC to be encrypted too.
*/
if (volume_info->seal) {
if (ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)
seal = true;
else {
cifs_dbg(VFS,
"IPC: server doesn't support encryption\n");
return -EOPNOTSUPP;
}
}
tcon = tconInfoAlloc();
if (tcon == NULL)
return -ENOMEM;
snprintf(unc, sizeof(unc), "\\\\%s\\IPC$", ses->serverName);
/* cannot fail */
nls_codepage = load_nls_default();
xid = get_xid();
tcon->ses = ses;
tcon->ipc = true;
tcon->seal = seal;
rc = ses->server->ops->tree_connect(xid, ses, unc, tcon, nls_codepage);
free_xid(xid);
if (rc) {
cifs_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc);
tconInfoFree(tcon);
goto out;
}
cifs_dbg(FYI, "IPC tcon rc = %d ipc tid = %d\n", rc, tcon->tid);
ses->tcon_ipc = tcon;
out:
unload_nls(nls_codepage);
return rc;
}
/**
* cifs_free_ipc - helper to release the session IPC tcon
*
* Needs to be called everytime a session is destroyed
*/
static int
cifs_free_ipc(struct cifs_ses *ses)
{
int rc = 0, xid;
struct cifs_tcon *tcon = ses->tcon_ipc;
if (tcon == NULL)
return 0;
if (ses->server->ops->tree_disconnect) {
xid = get_xid();
rc = ses->server->ops->tree_disconnect(xid, tcon);
free_xid(xid);
}
if (rc)
cifs_dbg(FYI, "failed to disconnect IPC tcon (rc=%d)\n", rc);
tconInfoFree(tcon);
ses->tcon_ipc = NULL;
return rc;
}
static struct cifs_ses * static struct cifs_ses *
cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
{ {
...@@ -2421,6 +2554,8 @@ cifs_put_smb_ses(struct cifs_ses *ses) ...@@ -2421,6 +2554,8 @@ cifs_put_smb_ses(struct cifs_ses *ses)
ses->status = CifsExiting; ses->status = CifsExiting;
spin_unlock(&cifs_tcp_ses_lock); spin_unlock(&cifs_tcp_ses_lock);
cifs_free_ipc(ses);
if (ses->status == CifsExiting && server->ops->logoff) { if (ses->status == CifsExiting && server->ops->logoff) {
xid = get_xid(); xid = get_xid();
rc = server->ops->logoff(xid, ses); rc = server->ops->logoff(xid, ses);
...@@ -2569,6 +2704,13 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)), ...@@ -2569,6 +2704,13 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)),
} }
#endif /* CONFIG_KEYS */ #endif /* CONFIG_KEYS */
/**
* cifs_get_smb_ses - get a session matching @volume_info data from @server
*
* This function assumes it is being called from cifs_mount() where we
* already got a server reference (server refcount +1). See
* cifs_get_tcon() for refcount explanations.
*/
static struct cifs_ses * static struct cifs_ses *
cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
{ {
...@@ -2665,6 +2807,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) ...@@ -2665,6 +2807,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
spin_unlock(&cifs_tcp_ses_lock); spin_unlock(&cifs_tcp_ses_lock);
free_xid(xid); free_xid(xid);
cifs_setup_ipc(ses, volume_info);
return ses; return ses;
get_ses_fail: get_ses_fail:
...@@ -2709,8 +2854,16 @@ void ...@@ -2709,8 +2854,16 @@ void
cifs_put_tcon(struct cifs_tcon *tcon) cifs_put_tcon(struct cifs_tcon *tcon)
{ {
unsigned int xid; unsigned int xid;
struct cifs_ses *ses = tcon->ses; struct cifs_ses *ses;
/*
* IPC tcon share the lifetime of their session and are
* destroyed in the session put function
*/
if (tcon == NULL || tcon->ipc)
return;
ses = tcon->ses;
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count); cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock); spin_lock(&cifs_tcp_ses_lock);
if (--tcon->tc_count > 0) { if (--tcon->tc_count > 0) {
...@@ -2731,6 +2884,26 @@ cifs_put_tcon(struct cifs_tcon *tcon) ...@@ -2731,6 +2884,26 @@ cifs_put_tcon(struct cifs_tcon *tcon)
cifs_put_smb_ses(ses); cifs_put_smb_ses(ses);
} }
/**
* cifs_get_tcon - get a tcon matching @volume_info data from @ses
*
* - tcon refcount is the number of mount points using the tcon.
* - ses refcount is the number of tcon using the session.
*
* 1. This function assumes it is being called from cifs_mount() where
* we already got a session reference (ses refcount +1).
*
* 2. Since we're in the context of adding a mount point, the end
* result should be either:
*
* a) a new tcon already allocated with refcount=1 (1 mount point) and
* its session refcount incremented (1 new tcon). This +1 was
* already done in (1).
*
* b) an existing tcon with refcount+1 (add a mount point to it) and
* identical ses refcount (no new tcon). Because of (1) we need to
* decrement the ses refcount.
*/
static struct cifs_tcon * static struct cifs_tcon *
cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
{ {
...@@ -2739,8 +2912,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) ...@@ -2739,8 +2912,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
tcon = cifs_find_tcon(ses, volume_info); tcon = cifs_find_tcon(ses, volume_info);
if (tcon) { if (tcon) {
/*
* tcon has refcount already incremented but we need to
* decrement extra ses reference gotten by caller (case b)
*/
cifs_dbg(FYI, "Found match on UNC path\n"); cifs_dbg(FYI, "Found match on UNC path\n");
/* existing tcon already has a reference */
cifs_put_smb_ses(ses); cifs_put_smb_ses(ses);
return tcon; return tcon;
} }
...@@ -2986,39 +3162,17 @@ get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path, ...@@ -2986,39 +3162,17 @@ get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path,
const struct nls_table *nls_codepage, unsigned int *num_referrals, const struct nls_table *nls_codepage, unsigned int *num_referrals,
struct dfs_info3_param **referrals, int remap) struct dfs_info3_param **referrals, int remap)
{ {
char *temp_unc;
int rc = 0; int rc = 0;
if (!ses->server->ops->tree_connect || !ses->server->ops->get_dfs_refer) if (!ses->server->ops->get_dfs_refer)
return -ENOSYS; return -ENOSYS;
*num_referrals = 0; *num_referrals = 0;
*referrals = NULL; *referrals = NULL;
if (ses->ipc_tid == 0) {
temp_unc = kmalloc(2 /* for slashes */ +
strnlen(ses->serverName, SERVER_NAME_LEN_WITH_NULL * 2)
+ 1 + 4 /* slash IPC$ */ + 2, GFP_KERNEL);
if (temp_unc == NULL)
return -ENOMEM;
temp_unc[0] = '\\';
temp_unc[1] = '\\';
strcpy(temp_unc + 2, ses->serverName);
strcpy(temp_unc + 2 + strlen(ses->serverName), "\\IPC$");
rc = ses->server->ops->tree_connect(xid, ses, temp_unc, NULL,
nls_codepage);
cifs_dbg(FYI, "Tcon rc = %d ipc_tid = %d\n", rc, ses->ipc_tid);
kfree(temp_unc);
}
if (rc == 0)
rc = ses->server->ops->get_dfs_refer(xid, ses, old_path, rc = ses->server->ops->get_dfs_refer(xid, ses, old_path,
referrals, num_referrals, referrals, num_referrals,
nls_codepage, remap); nls_codepage, remap);
/*
* BB - map targetUNCs to dfs_info3 structures, here or in
* ses->server->ops->get_dfs_refer.
*/
return rc; return rc;
} }
...@@ -3783,7 +3937,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) ...@@ -3783,7 +3937,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
tcon->unix_ext = 0; /* server does not support them */ tcon->unix_ext = 0; /* server does not support them */
/* do not care if a following call succeed - informational */ /* do not care if a following call succeed - informational */
if (!tcon->ipc && server->ops->qfs_tcon) if (!tcon->pipe && server->ops->qfs_tcon)
server->ops->qfs_tcon(xid, tcon); server->ops->qfs_tcon(xid, tcon);
cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info); cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info);
...@@ -3913,8 +4067,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) ...@@ -3913,8 +4067,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
} }
/* /*
* Issue a TREE_CONNECT request. Note that for IPC$ shares, that the tcon * Issue a TREE_CONNECT request.
* pointer may be NULL.
*/ */
int int
CIFSTCon(const unsigned int xid, struct cifs_ses *ses, CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
...@@ -3950,7 +4103,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, ...@@ -3950,7 +4103,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
pSMB->AndXCommand = 0xFF; pSMB->AndXCommand = 0xFF;
pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
bcc_ptr = &pSMB->Password[0]; bcc_ptr = &pSMB->Password[0];
if (!tcon || (ses->server->sec_mode & SECMODE_USER)) { if (tcon->pipe || (ses->server->sec_mode & SECMODE_USER)) {
pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
*bcc_ptr = 0; /* password is null byte */ *bcc_ptr = 0; /* password is null byte */
bcc_ptr++; /* skip password */ bcc_ptr++; /* skip password */
...@@ -4022,7 +4175,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, ...@@ -4022,7 +4175,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
0); 0);
/* above now done in SendReceive */ /* above now done in SendReceive */
if ((rc == 0) && (tcon != NULL)) { if (rc == 0) {
bool is_unicode; bool is_unicode;
tcon->tidStatus = CifsGood; tcon->tidStatus = CifsGood;
...@@ -4042,7 +4195,8 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, ...@@ -4042,7 +4195,8 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') && if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
(bcc_ptr[2] == 'C')) { (bcc_ptr[2] == 'C')) {
cifs_dbg(FYI, "IPC connection\n"); cifs_dbg(FYI, "IPC connection\n");
tcon->ipc = 1; tcon->ipc = true;
tcon->pipe = true;
} }
} else if (length == 2) { } else if (length == 2) {
if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) { if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
...@@ -4069,9 +4223,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, ...@@ -4069,9 +4223,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
else else
tcon->Flags = 0; tcon->Flags = 0;
cifs_dbg(FYI, "Tcon flags: 0x%x\n", tcon->Flags); cifs_dbg(FYI, "Tcon flags: 0x%x\n", tcon->Flags);
} else if ((rc == 0) && tcon == NULL) {
/* all we need to save for IPC$ connection */
ses->ipc_tid = smb_buffer_response->Tid;
} }
cifs_buf_release(smb_buffer); cifs_buf_release(smb_buffer);
...@@ -4235,7 +4386,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ...@@ -4235,7 +4386,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
reset_cifs_unix_caps(0, tcon, NULL, vol_info); reset_cifs_unix_caps(0, tcon, NULL, vol_info);
out: out:
kfree(vol_info->username); kfree(vol_info->username);
kfree(vol_info->password); kzfree(vol_info->password);
kfree(vol_info); kfree(vol_info);
return tcon; return tcon;
...@@ -4387,7 +4538,7 @@ cifs_prune_tlinks(struct work_struct *work) ...@@ -4387,7 +4538,7 @@ cifs_prune_tlinks(struct work_struct *work)
struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info, struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
prune_tlinks.work); prune_tlinks.work);
struct rb_root *root = &cifs_sb->tlink_tree; struct rb_root *root = &cifs_sb->tlink_tree;
struct rb_node *node = rb_first(root); struct rb_node *node;
struct rb_node *tmp; struct rb_node *tmp;
struct tcon_link *tlink; struct tcon_link *tlink;
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
#include "cifs_debug.h" #include "cifs_debug.h"
#include "cifs_fs_sb.h" #include "cifs_fs_sb.h"
#include "fscache.h" #include "fscache.h"
#include "smbdirect.h"
static inline int cifs_convert_flags(unsigned int flags) static inline int cifs_convert_flags(unsigned int flags)
{ {
...@@ -2902,7 +2902,12 @@ cifs_readdata_release(struct kref *refcount) ...@@ -2902,7 +2902,12 @@ cifs_readdata_release(struct kref *refcount)
{ {
struct cifs_readdata *rdata = container_of(refcount, struct cifs_readdata *rdata = container_of(refcount,
struct cifs_readdata, refcount); struct cifs_readdata, refcount);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (rdata->mr) {
smbd_deregister_mr(rdata->mr);
rdata->mr = NULL;
}
#endif
if (rdata->cfile) if (rdata->cfile)
cifsFileInfo_put(rdata->cfile); cifsFileInfo_put(rdata->cfile);
...@@ -3031,6 +3036,10 @@ uncached_fill_pages(struct TCP_Server_Info *server, ...@@ -3031,6 +3036,10 @@ uncached_fill_pages(struct TCP_Server_Info *server,
} }
if (iter) if (iter)
result = copy_page_from_iter(page, 0, n, iter); result = copy_page_from_iter(page, 0, n, iter);
#ifdef CONFIG_CIFS_SMB_DIRECT
else if (rdata->mr)
result = n;
#endif
else else
result = cifs_read_page_from_socket(server, page, n); result = cifs_read_page_from_socket(server, page, n);
if (result < 0) if (result < 0)
...@@ -3471,20 +3480,18 @@ static const struct vm_operations_struct cifs_file_vm_ops = { ...@@ -3471,20 +3480,18 @@ static const struct vm_operations_struct cifs_file_vm_ops = {
int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
{ {
int rc, xid; int xid, rc = 0;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
xid = get_xid(); xid = get_xid();
if (!CIFS_CACHE_READ(CIFS_I(inode))) { if (!CIFS_CACHE_READ(CIFS_I(inode)))
rc = cifs_zap_mapping(inode); rc = cifs_zap_mapping(inode);
if (rc) if (!rc)
return rc;
}
rc = generic_file_mmap(file, vma); rc = generic_file_mmap(file, vma);
if (rc == 0) if (!rc)
vma->vm_ops = &cifs_file_vm_ops; vma->vm_ops = &cifs_file_vm_ops;
free_xid(xid); free_xid(xid);
return rc; return rc;
} }
...@@ -3494,16 +3501,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -3494,16 +3501,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
int rc, xid; int rc, xid;
xid = get_xid(); xid = get_xid();
rc = cifs_revalidate_file(file); rc = cifs_revalidate_file(file);
if (rc) { if (rc)
cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
rc); rc);
free_xid(xid); if (!rc)
return rc;
}
rc = generic_file_mmap(file, vma); rc = generic_file_mmap(file, vma);
if (rc == 0) if (!rc)
vma->vm_ops = &cifs_file_vm_ops; vma->vm_ops = &cifs_file_vm_ops;
free_xid(xid); free_xid(xid);
return rc; return rc;
} }
...@@ -3600,6 +3607,10 @@ readpages_fill_pages(struct TCP_Server_Info *server, ...@@ -3600,6 +3607,10 @@ readpages_fill_pages(struct TCP_Server_Info *server,
if (iter) if (iter)
result = copy_page_from_iter(page, 0, n, iter); result = copy_page_from_iter(page, 0, n, iter);
#ifdef CONFIG_CIFS_SMB_DIRECT
else if (rdata->mr)
result = n;
#endif
else else
result = cifs_read_page_from_socket(server, page, n); result = cifs_read_page_from_socket(server, page, n);
if (result < 0) if (result < 0)
......
...@@ -1049,7 +1049,7 @@ struct inode *cifs_root_iget(struct super_block *sb) ...@@ -1049,7 +1049,7 @@ struct inode *cifs_root_iget(struct super_block *sb)
tcon->resource_id = CIFS_I(inode)->uniqueid; tcon->resource_id = CIFS_I(inode)->uniqueid;
#endif #endif
if (rc && tcon->ipc) { if (rc && tcon->pipe) {
cifs_dbg(FYI, "ipc connection - fake read inode\n"); cifs_dbg(FYI, "ipc connection - fake read inode\n");
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
inode->i_mode |= S_IFDIR; inode->i_mode |= S_IFDIR;
......
...@@ -98,14 +98,11 @@ sesInfoFree(struct cifs_ses *buf_to_free) ...@@ -98,14 +98,11 @@ sesInfoFree(struct cifs_ses *buf_to_free)
kfree(buf_to_free->serverOS); kfree(buf_to_free->serverOS);
kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverDomain);
kfree(buf_to_free->serverNOS); kfree(buf_to_free->serverNOS);
if (buf_to_free->password) { kzfree(buf_to_free->password);
memset(buf_to_free->password, 0, strlen(buf_to_free->password));
kfree(buf_to_free->password);
}
kfree(buf_to_free->user_name); kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName); kfree(buf_to_free->domainName);
kfree(buf_to_free->auth_key.response); kzfree(buf_to_free->auth_key.response);
kfree(buf_to_free); kzfree(buf_to_free);
} }
struct cifs_tcon * struct cifs_tcon *
...@@ -136,10 +133,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free) ...@@ -136,10 +133,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free)
} }
atomic_dec(&tconInfoAllocCount); atomic_dec(&tconInfoAllocCount);
kfree(buf_to_free->nativeFileSystem); kfree(buf_to_free->nativeFileSystem);
if (buf_to_free->password) { kzfree(buf_to_free->password);
memset(buf_to_free->password, 0, strlen(buf_to_free->password));
kfree(buf_to_free->password);
}
kfree(buf_to_free); kfree(buf_to_free);
} }
......
...@@ -87,9 +87,11 @@ cifs_read_data_offset(char *buf) ...@@ -87,9 +87,11 @@ cifs_read_data_offset(char *buf)
} }
static unsigned int static unsigned int
cifs_read_data_length(char *buf) cifs_read_data_length(char *buf, bool in_remaining)
{ {
READ_RSP *rsp = (READ_RSP *)buf; READ_RSP *rsp = (READ_RSP *)buf;
/* It's a bug reading remaining data for SMB1 packets */
WARN_ON(in_remaining);
return (le16_to_cpu(rsp->DataLengthHigh) << 16) + return (le16_to_cpu(rsp->DataLengthHigh) << 16) +
le16_to_cpu(rsp->DataLength); le16_to_cpu(rsp->DataLength);
} }
......
...@@ -74,7 +74,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, ...@@ -74,7 +74,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
nr_ioctl_req.Reserved = 0; nr_ioctl_req.Reserved = 0;
rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
true /* is_fsctl */, false /* use_ipc */, true /* is_fsctl */,
(char *)&nr_ioctl_req, sizeof(nr_ioctl_req), (char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
NULL, NULL /* no return info */); NULL, NULL /* no return info */);
if (rc == -EOPNOTSUPP) { if (rc == -EOPNOTSUPP) {
......
...@@ -578,7 +578,7 @@ smb2_is_valid_lease_break(char *buffer) ...@@ -578,7 +578,7 @@ smb2_is_valid_lease_break(char *buffer)
bool bool
smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
{ {
struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer; struct smb2_oplock_break_rsp *rsp = (struct smb2_oplock_break_rsp *)buffer;
struct list_head *tmp, *tmp1, *tmp2; struct list_head *tmp, *tmp1, *tmp2;
struct cifs_ses *ses; struct cifs_ses *ses;
struct cifs_tcon *tcon; struct cifs_tcon *tcon;
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "smb2status.h" #include "smb2status.h"
#include "smb2glob.h" #include "smb2glob.h"
#include "cifs_ioctl.h" #include "cifs_ioctl.h"
#include "smbdirect.h"
static int static int
change_conf(struct TCP_Server_Info *server) change_conf(struct TCP_Server_Info *server)
...@@ -250,7 +251,11 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) ...@@ -250,7 +251,11 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified wsize, or default */ /* start with specified wsize, or default */
wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write); wsize = min_t(unsigned int, wsize, server->max_write);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->rdma)
wsize = min_t(unsigned int,
wsize, server->smbd_conn->max_readwrite_size);
#endif
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
...@@ -266,6 +271,11 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) ...@@ -266,6 +271,11 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified rsize, or default */ /* start with specified rsize, or default */
rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read); rsize = min_t(unsigned int, rsize, server->max_read);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->rdma)
rsize = min_t(unsigned int,
rsize, server->smbd_conn->max_readwrite_size);
#endif
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
...@@ -283,7 +293,6 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) ...@@ -283,7 +293,6 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */, FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
false /* use_ipc */,
NULL /* no data input */, 0 /* no data input */, NULL /* no data input */, 0 /* no data input */,
(char **)&out_buf, &ret_data_len); (char **)&out_buf, &ret_data_len);
if (rc != 0) if (rc != 0)
...@@ -782,7 +791,6 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -782,7 +791,6 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */, FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
false /* use_ipc */,
NULL, 0 /* no input */, NULL, 0 /* no input */,
(char **)&res_key, &ret_data_len); (char **)&res_key, &ret_data_len);
...@@ -848,8 +856,7 @@ smb2_copychunk_range(const unsigned int xid, ...@@ -848,8 +856,7 @@ smb2_copychunk_range(const unsigned int xid,
/* Request server copy to target from src identified by key */ /* Request server copy to target from src identified by key */
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
true /* is_fsctl */, false /* use_ipc */, true /* is_fsctl */, (char *)pcchunk,
(char *)pcchunk,
sizeof(struct copychunk_ioctl), (char **)&retbuf, sizeof(struct copychunk_ioctl), (char **)&retbuf,
&ret_data_len); &ret_data_len);
if (rc == 0) { if (rc == 0) {
...@@ -947,9 +954,13 @@ smb2_read_data_offset(char *buf) ...@@ -947,9 +954,13 @@ smb2_read_data_offset(char *buf)
} }
static unsigned int static unsigned int
smb2_read_data_length(char *buf) smb2_read_data_length(char *buf, bool in_remaining)
{ {
struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf;
if (in_remaining)
return le32_to_cpu(rsp->DataRemaining);
return le32_to_cpu(rsp->DataLength); return le32_to_cpu(rsp->DataLength);
} }
...@@ -1006,7 +1017,7 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -1006,7 +1017,7 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_SPARSE, cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
true /* is_fctl */, false /* use_ipc */, true /* is_fctl */,
&setsparse, 1, NULL, NULL); &setsparse, 1, NULL, NULL);
if (rc) { if (rc) {
tcon->broken_sparse_sup = true; tcon->broken_sparse_sup = true;
...@@ -1077,7 +1088,7 @@ smb2_duplicate_extents(const unsigned int xid, ...@@ -1077,7 +1088,7 @@ smb2_duplicate_extents(const unsigned int xid,
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, trgtfile->fid.volatile_fid,
FSCTL_DUPLICATE_EXTENTS_TO_FILE, FSCTL_DUPLICATE_EXTENTS_TO_FILE,
true /* is_fsctl */, false /* use_ipc */, true /* is_fsctl */,
(char *)&dup_ext_buf, (char *)&dup_ext_buf,
sizeof(struct duplicate_extents_to_file), sizeof(struct duplicate_extents_to_file),
NULL, NULL,
...@@ -1112,7 +1123,7 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -1112,7 +1123,7 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->fid.volatile_fid,
FSCTL_SET_INTEGRITY_INFORMATION, FSCTL_SET_INTEGRITY_INFORMATION,
true /* is_fsctl */, false /* use_ipc */, true /* is_fsctl */,
(char *)&integr_info, (char *)&integr_info,
sizeof(struct fsctl_set_integrity_information_req), sizeof(struct fsctl_set_integrity_information_req),
NULL, NULL,
...@@ -1132,7 +1143,7 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -1132,7 +1143,7 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->fid.volatile_fid,
FSCTL_SRV_ENUMERATE_SNAPSHOTS, FSCTL_SRV_ENUMERATE_SNAPSHOTS,
true /* is_fsctl */, false /* use_ipc */, true /* is_fsctl */,
NULL, 0 /* no input data */, NULL, 0 /* no input data */,
(char **)&retbuf, (char **)&retbuf,
&ret_data_len); &ret_data_len);
...@@ -1351,16 +1362,20 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, ...@@ -1351,16 +1362,20 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
cifs_dbg(FYI, "smb2_get_dfs_refer path <%s>\n", search_name); cifs_dbg(FYI, "smb2_get_dfs_refer path <%s>\n", search_name);
/* /*
* Use any tcon from the current session. Here, the first one. * Try to use the IPC tcon, otherwise just use any
*/ */
tcon = ses->tcon_ipc;
if (tcon == NULL) {
spin_lock(&cifs_tcp_ses_lock); spin_lock(&cifs_tcp_ses_lock);
tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon, tcon = list_first_entry_or_null(&ses->tcon_list,
struct cifs_tcon,
tcon_list); tcon_list);
if (tcon) if (tcon)
tcon->tc_count++; tcon->tc_count++;
spin_unlock(&cifs_tcp_ses_lock); spin_unlock(&cifs_tcp_ses_lock);
}
if (!tcon) { if (tcon == NULL) {
cifs_dbg(VFS, "session %p has no tcon available for a dfs referral request\n", cifs_dbg(VFS, "session %p has no tcon available for a dfs referral request\n",
ses); ses);
rc = -ENOTCONN; rc = -ENOTCONN;
...@@ -1389,20 +1404,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, ...@@ -1389,20 +1404,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
memcpy(dfs_req->RequestFileName, utf16_path, utf16_path_len); memcpy(dfs_req->RequestFileName, utf16_path, utf16_path_len);
do { do {
/* try first with IPC */
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_DFS_GET_REFERRALS, FSCTL_DFS_GET_REFERRALS,
true /* is_fsctl */, true /* use_ipc */, true /* is_fsctl */,
(char *)dfs_req, dfs_req_size, (char *)dfs_req, dfs_req_size,
(char **)&dfs_rsp, &dfs_rsp_size); (char **)&dfs_rsp, &dfs_rsp_size);
if (rc == -ENOTCONN) {
/* try with normal tcon */
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_DFS_GET_REFERRALS,
true /* is_fsctl */, false /*use_ipc*/,
(char *)dfs_req, dfs_req_size,
(char **)&dfs_rsp, &dfs_rsp_size);
}
} while (rc == -EAGAIN); } while (rc == -EAGAIN);
if (rc) { if (rc) {
...@@ -1421,7 +1427,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, ...@@ -1421,7 +1427,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
} }
out: out:
if (tcon) { if (tcon && !tcon->ipc) {
/* ipc tcons are not refcounted */
spin_lock(&cifs_tcp_ses_lock); spin_lock(&cifs_tcp_ses_lock);
tcon->tc_count--; tcon->tc_count--;
spin_unlock(&cifs_tcp_ses_lock); spin_unlock(&cifs_tcp_ses_lock);
...@@ -1713,8 +1720,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, ...@@ -1713,8 +1720,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
true /* is_fctl */, false /* use_ipc */, true /* is_fctl */, (char *)&fsctl_buf,
(char *)&fsctl_buf,
sizeof(struct file_zero_data_information), NULL, NULL); sizeof(struct file_zero_data_information), NULL, NULL);
free_xid(xid); free_xid(xid);
return rc; return rc;
...@@ -1748,8 +1754,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, ...@@ -1748,8 +1754,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
true /* is_fctl */, false /* use_ipc */, true /* is_fctl */, (char *)&fsctl_buf,
(char *)&fsctl_buf,
sizeof(struct file_zero_data_information), NULL, NULL); sizeof(struct file_zero_data_information), NULL, NULL);
free_xid(xid); free_xid(xid);
return rc; return rc;
...@@ -2411,6 +2416,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, ...@@ -2411,6 +2416,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
struct iov_iter iter; struct iov_iter iter;
struct kvec iov; struct kvec iov;
int length; int length;
bool use_rdma_mr = false;
if (shdr->Command != SMB2_READ) { if (shdr->Command != SMB2_READ) {
cifs_dbg(VFS, "only big read responses are supported\n"); cifs_dbg(VFS, "only big read responses are supported\n");
...@@ -2437,7 +2443,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, ...@@ -2437,7 +2443,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
} }
data_offset = server->ops->read_data_offset(buf) + 4; data_offset = server->ops->read_data_offset(buf) + 4;
data_len = server->ops->read_data_length(buf); #ifdef CONFIG_CIFS_SMB_DIRECT
use_rdma_mr = rdata->mr;
#endif
data_len = server->ops->read_data_length(buf, use_rdma_mr);
if (data_offset < server->vals->read_rsp_size) { if (data_offset < server->vals->read_rsp_size) {
/* /*
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include "smb2glob.h" #include "smb2glob.h"
#include "cifspdu.h" #include "cifspdu.h"
#include "cifs_spnego.h" #include "cifs_spnego.h"
#include "smbdirect.h"
/* /*
* The following table defines the expected "StructureSize" of SMB2 requests * The following table defines the expected "StructureSize" of SMB2 requests
...@@ -319,54 +320,16 @@ fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, void *buf, ...@@ -319,54 +320,16 @@ fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, void *buf,
*total_len = parmsize + sizeof(struct smb2_sync_hdr); *total_len = parmsize + sizeof(struct smb2_sync_hdr);
} }
/* init request without RFC1001 length at the beginning */
static int
smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
void **request_buf, unsigned int *total_len)
{
int rc;
struct smb2_sync_hdr *shdr;
rc = smb2_reconnect(smb2_command, tcon);
if (rc)
return rc;
/* BB eventually switch this to SMB2 specific small buf size */
*request_buf = cifs_small_buf_get();
if (*request_buf == NULL) {
/* BB should we add a retry in here if not a writepage? */
return -ENOMEM;
}
shdr = (struct smb2_sync_hdr *)(*request_buf);
fill_small_buf(smb2_command, tcon, shdr, total_len);
if (tcon != NULL) {
#ifdef CONFIG_CIFS_STATS2
uint16_t com_code = le16_to_cpu(smb2_command);
cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]);
#endif
cifs_stats_inc(&tcon->num_smbs_sent);
}
return rc;
}
/* /*
* Allocate and return pointer to an SMB request hdr, and set basic * Allocate and return pointer to an SMB request hdr, and set basic
* SMB information in the SMB header. If the return code is zero, this * SMB information in the SMB header. If the return code is zero, this
* function must have filled in request_buf pointer. The returned buffer * function must have filled in request_buf pointer.
* has RFC1001 length at the beginning.
*/ */
static int static int
small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
void **request_buf) void **request_buf, unsigned int *total_len)
{ {
int rc; int rc;
unsigned int total_len;
struct smb2_pdu *pdu;
rc = smb2_reconnect(smb2_command, tcon); rc = smb2_reconnect(smb2_command, tcon);
if (rc) if (rc)
...@@ -379,12 +342,9 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, ...@@ -379,12 +342,9 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
return -ENOMEM; return -ENOMEM;
} }
pdu = (struct smb2_pdu *)(*request_buf); fill_small_buf(smb2_command, tcon,
(struct smb2_sync_hdr *)(*request_buf),
fill_small_buf(smb2_command, tcon, get_sync_hdr(pdu), &total_len); total_len);
/* Note this is only network field converted to big endian */
pdu->hdr.smb2_buf_length = cpu_to_be32(total_len);
if (tcon != NULL) { if (tcon != NULL) {
#ifdef CONFIG_CIFS_STATS2 #ifdef CONFIG_CIFS_STATS2
...@@ -398,8 +358,8 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, ...@@ -398,8 +358,8 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
} }
#ifdef CONFIG_CIFS_SMB311 #ifdef CONFIG_CIFS_SMB311
/* offset is sizeof smb2_negotiate_req - 4 but rounded up to 8 bytes */ /* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */
#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) - 4 */ #define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) */
#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1) #define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
...@@ -427,23 +387,25 @@ build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt) ...@@ -427,23 +387,25 @@ build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
} }
static void static void
assemble_neg_contexts(struct smb2_negotiate_req *req) assemble_neg_contexts(struct smb2_negotiate_req *req,
unsigned int *total_len)
{ {
char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT;
/* +4 is to account for the RFC1001 len field */
char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT + 4;
build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt); build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
/* Add 2 to size to round to 8 byte boundary */ /* Add 2 to size to round to 8 byte boundary */
pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context); pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context);
build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt); build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
req->NegotiateContextCount = cpu_to_le16(2); req->NegotiateContextCount = cpu_to_le16(2);
inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
+ sizeof(struct smb2_encryption_neg_context)); /* calculate hash */ *total_len += 4 + sizeof(struct smb2_preauth_neg_context)
+ sizeof(struct smb2_encryption_neg_context);
} }
#else #else
static void assemble_neg_contexts(struct smb2_negotiate_req *req) static void assemble_neg_contexts(struct smb2_negotiate_req *req,
unsigned int *total_len)
{ {
return; return;
} }
...@@ -477,6 +439,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) ...@@ -477,6 +439,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
int blob_offset, blob_length; int blob_offset, blob_length;
char *security_blob; char *security_blob;
int flags = CIFS_NEG_OP; int flags = CIFS_NEG_OP;
unsigned int total_len;
cifs_dbg(FYI, "Negotiate protocol\n"); cifs_dbg(FYI, "Negotiate protocol\n");
...@@ -485,30 +448,30 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) ...@@ -485,30 +448,30 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
return -EIO; return -EIO;
} }
rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req); rc = smb2_plain_req_init(SMB2_NEGOTIATE, NULL, (void **) &req, &total_len);
if (rc) if (rc)
return rc; return rc;
req->hdr.sync_hdr.SessionId = 0; req->sync_hdr.SessionId = 0;
if (strcmp(ses->server->vals->version_string, if (strcmp(ses->server->vals->version_string,
SMB3ANY_VERSION_STRING) == 0) { SMB3ANY_VERSION_STRING) == 0) {
req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID); req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID); req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
req->DialectCount = cpu_to_le16(2); req->DialectCount = cpu_to_le16(2);
inc_rfc1001_len(req, 4); total_len += 4;
} else if (strcmp(ses->server->vals->version_string, } else if (strcmp(ses->server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) { SMBDEFAULT_VERSION_STRING) == 0) {
req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
req->DialectCount = cpu_to_le16(3); req->DialectCount = cpu_to_le16(3);
inc_rfc1001_len(req, 6); total_len += 6;
} else { } else {
/* otherwise send specific dialect */ /* otherwise send specific dialect */
req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
req->DialectCount = cpu_to_le16(1); req->DialectCount = cpu_to_le16(1);
inc_rfc1001_len(req, 2); total_len += 2;
} }
/* only one of SMB2 signing flags may be set in SMB2 request */ /* only one of SMB2 signing flags may be set in SMB2 request */
...@@ -528,13 +491,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) ...@@ -528,13 +491,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
memcpy(req->ClientGUID, server->client_guid, memcpy(req->ClientGUID, server->client_guid,
SMB2_CLIENT_GUID_SIZE); SMB2_CLIENT_GUID_SIZE);
if (ses->server->vals->protocol_id == SMB311_PROT_ID) if (ses->server->vals->protocol_id == SMB311_PROT_ID)
assemble_neg_contexts(req); assemble_neg_contexts(req, &total_len);
} }
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field */ iov[0].iov_len = total_len;
iov[0].iov_len = get_rfc1002_length(req) + 4;
rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_negotiate_rsp *)rsp_iov.iov_base; rsp = (struct smb2_negotiate_rsp *)rsp_iov.iov_base;
/* /*
...@@ -654,6 +616,11 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) ...@@ -654,6 +616,11 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
cifs_dbg(FYI, "validate negotiate\n"); cifs_dbg(FYI, "validate negotiate\n");
#ifdef CONFIG_CIFS_SMB_DIRECT
if (tcon->ses->server->rdma)
return 0;
#endif
/* /*
* validation ioctl must be signed, so no point sending this if we * validation ioctl must be signed, so no point sending this if we
* can not sign it (ie are not known user). Even if signing is not * can not sign it (ie are not known user). Even if signing is not
...@@ -713,7 +680,6 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) ...@@ -713,7 +680,6 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
false /* use_ipc */,
(char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
(char **)&pneg_rsp, &rsplen); (char **)&pneg_rsp, &rsplen);
...@@ -733,8 +699,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) ...@@ -733,8 +699,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
} }
/* check validate negotiate info response matches what we got earlier */ /* check validate negotiate info response matches what we got earlier */
if (pneg_rsp->Dialect != if (pneg_rsp->Dialect != cpu_to_le16(tcon->ses->server->dialect))
cpu_to_le16(tcon->ses->server->vals->protocol_id))
goto vneg_out; goto vneg_out;
if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode))
...@@ -806,20 +771,22 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) ...@@ -806,20 +771,22 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
struct cifs_ses *ses = sess_data->ses; struct cifs_ses *ses = sess_data->ses;
struct smb2_sess_setup_req *req; struct smb2_sess_setup_req *req;
struct TCP_Server_Info *server = ses->server; struct TCP_Server_Info *server = ses->server;
unsigned int total_len;
rc = small_smb2_init(SMB2_SESSION_SETUP, NULL, (void **) &req); rc = smb2_plain_req_init(SMB2_SESSION_SETUP, NULL, (void **) &req,
&total_len);
if (rc) if (rc)
return rc; return rc;
/* First session, not a reauthenticate */ /* First session, not a reauthenticate */
req->hdr.sync_hdr.SessionId = 0; req->sync_hdr.SessionId = 0;
/* if reconnect, we need to send previous sess id, otherwise it is 0 */ /* if reconnect, we need to send previous sess id, otherwise it is 0 */
req->PreviousSessionId = sess_data->previous_session; req->PreviousSessionId = sess_data->previous_session;
req->Flags = 0; /* MBZ */ req->Flags = 0; /* MBZ */
/* to enable echos and oplocks */ /* to enable echos and oplocks */
req->hdr.sync_hdr.CreditRequest = cpu_to_le16(3); req->sync_hdr.CreditRequest = cpu_to_le16(3);
/* only one of SMB2 signing flags may be set in SMB2 request */ /* only one of SMB2 signing flags may be set in SMB2 request */
if (server->sign) if (server->sign)
...@@ -833,8 +800,8 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) ...@@ -833,8 +800,8 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
req->Channel = 0; /* MBZ */ req->Channel = 0; /* MBZ */
sess_data->iov[0].iov_base = (char *)req; sess_data->iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field and 1 for pad */ /* 1 for pad */
sess_data->iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; sess_data->iov[0].iov_len = total_len - 1;
/* /*
* This variable will be used to clear the buffer * This variable will be used to clear the buffer
* allocated above in case of any error in the calling function. * allocated above in case of any error in the calling function.
...@@ -860,15 +827,12 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data) ...@@ -860,15 +827,12 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
/* Testing shows that buffer offset must be at location of Buffer[0] */ /* Testing shows that buffer offset must be at location of Buffer[0] */
req->SecurityBufferOffset = req->SecurityBufferOffset =
cpu_to_le16(sizeof(struct smb2_sess_setup_req) - cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 1 /* pad */);
1 /* pad */ - 4 /* rfc1001 len */);
req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len); req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len);
inc_rfc1001_len(req, sess_data->iov[1].iov_len - 1 /* pad */);
/* BB add code to build os and lm fields */ /* BB add code to build os and lm fields */
rc = SendReceive2(sess_data->xid, sess_data->ses, rc = smb2_send_recv(sess_data->xid, sess_data->ses,
sess_data->iov, 2, sess_data->iov, 2,
&sess_data->buf0_type, &sess_data->buf0_type,
CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov); CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
...@@ -1092,7 +1056,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) ...@@ -1092,7 +1056,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
goto out; goto out;
req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base; req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base;
req->hdr.sync_hdr.SessionId = ses->Suid; req->sync_hdr.SessionId = ses->Suid;
rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses, rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
sess_data->nls_cp); sess_data->nls_cp);
...@@ -1202,6 +1166,10 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) ...@@ -1202,6 +1166,10 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
int rc = 0; int rc = 0;
struct TCP_Server_Info *server; struct TCP_Server_Info *server;
int flags = 0; int flags = 0;
unsigned int total_len;
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buf_type;
cifs_dbg(FYI, "disconnect session %p\n", ses); cifs_dbg(FYI, "disconnect session %p\n", ses);
...@@ -1214,19 +1182,24 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) ...@@ -1214,19 +1182,24 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
if (ses->need_reconnect) if (ses->need_reconnect)
goto smb2_session_already_dead; goto smb2_session_already_dead;
rc = small_smb2_init(SMB2_LOGOFF, NULL, (void **) &req); rc = smb2_plain_req_init(SMB2_LOGOFF, NULL, (void **) &req, &total_len);
if (rc) if (rc)
return rc; return rc;
/* since no tcon, smb2_init can not do this, so do here */ /* since no tcon, smb2_init can not do this, so do here */
req->hdr.sync_hdr.SessionId = ses->Suid; req->sync_hdr.SessionId = ses->Suid;
if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
else if (server->sign) else if (server->sign)
req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED; req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
flags |= CIFS_NO_RESP;
rc = SendReceiveNoRsp(xid, ses, (char *) req, flags); iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
/* /*
* No tcon so can't do * No tcon so can't do
...@@ -1265,6 +1238,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, ...@@ -1265,6 +1238,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
int unc_path_len; int unc_path_len;
__le16 *unc_path = NULL; __le16 *unc_path = NULL;
int flags = 0; int flags = 0;
unsigned int total_len;
cifs_dbg(FYI, "TCON\n"); cifs_dbg(FYI, "TCON\n");
...@@ -1283,40 +1257,30 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, ...@@ -1283,40 +1257,30 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
} }
/* SMB2 TREE_CONNECT request must be called with TreeId == 0 */ /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
if (tcon)
tcon->tid = 0; tcon->tid = 0;
rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_TREE_CONNECT, tcon, (void **) &req,
&total_len);
if (rc) { if (rc) {
kfree(unc_path); kfree(unc_path);
return rc; return rc;
} }
if (tcon == NULL) { if (encryption_required(tcon))
if ((ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA))
flags |= CIFS_TRANSFORM_REQ;
/* since no tcon, smb2_init can not do this, so do here */
req->hdr.sync_hdr.SessionId = ses->Suid;
if (ses->server->sign)
req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
} else if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field and 1 for pad */ /* 1 for pad */
iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; iov[0].iov_len = total_len - 1;
/* Testing shows that buffer offset must be at location of Buffer[0] */ /* Testing shows that buffer offset must be at location of Buffer[0] */
req->PathOffset = cpu_to_le16(sizeof(struct smb2_tree_connect_req) req->PathOffset = cpu_to_le16(sizeof(struct smb2_tree_connect_req)
- 1 /* pad */ - 4 /* do not count rfc1001 len field */); - 1 /* pad */);
req->PathLength = cpu_to_le16(unc_path_len - 2); req->PathLength = cpu_to_le16(unc_path_len - 2);
iov[1].iov_base = unc_path; iov[1].iov_base = unc_path;
iov[1].iov_len = unc_path_len; iov[1].iov_len = unc_path_len;
inc_rfc1001_len(req, unc_path_len - 1 /* pad */); rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base; rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
...@@ -1328,21 +1292,16 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, ...@@ -1328,21 +1292,16 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
goto tcon_error_exit; goto tcon_error_exit;
} }
if (tcon == NULL) {
ses->ipc_tid = rsp->hdr.sync_hdr.TreeId;
goto tcon_exit;
}
switch (rsp->ShareType) { switch (rsp->ShareType) {
case SMB2_SHARE_TYPE_DISK: case SMB2_SHARE_TYPE_DISK:
cifs_dbg(FYI, "connection to disk share\n"); cifs_dbg(FYI, "connection to disk share\n");
break; break;
case SMB2_SHARE_TYPE_PIPE: case SMB2_SHARE_TYPE_PIPE:
tcon->ipc = true; tcon->pipe = true;
cifs_dbg(FYI, "connection to pipe share\n"); cifs_dbg(FYI, "connection to pipe share\n");
break; break;
case SMB2_SHARE_TYPE_PRINT: case SMB2_SHARE_TYPE_PRINT:
tcon->ipc = true; tcon->print = true;
cifs_dbg(FYI, "connection to printer\n"); cifs_dbg(FYI, "connection to printer\n");
break; break;
default: default:
...@@ -1389,6 +1348,10 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) ...@@ -1389,6 +1348,10 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
int rc = 0; int rc = 0;
struct cifs_ses *ses = tcon->ses; struct cifs_ses *ses = tcon->ses;
int flags = 0; int flags = 0;
unsigned int total_len;
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buf_type;
cifs_dbg(FYI, "Tree Disconnect\n"); cifs_dbg(FYI, "Tree Disconnect\n");
...@@ -1398,14 +1361,20 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) ...@@ -1398,14 +1361,20 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if ((tcon->need_reconnect) || (tcon->ses->need_reconnect)) if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
return 0; return 0;
rc = small_smb2_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req,
&total_len);
if (rc) if (rc)
return rc; return rc;
if (encryption_required(tcon)) if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
rc = SendReceiveNoRsp(xid, ses, (char *)req, flags); flags |= CIFS_NO_RESP;
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
if (rc) if (rc)
cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE); cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE);
...@@ -1505,11 +1474,10 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, ...@@ -1505,11 +1474,10 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov,
req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE;
if (!req->CreateContextsOffset) if (!req->CreateContextsOffset)
req->CreateContextsOffset = cpu_to_le32( req->CreateContextsOffset = cpu_to_le32(
sizeof(struct smb2_create_req) - 4 + sizeof(struct smb2_create_req) +
iov[num - 1].iov_len); iov[num - 1].iov_len);
le32_add_cpu(&req->CreateContextsLength, le32_add_cpu(&req->CreateContextsLength,
server->vals->create_lease_size); server->vals->create_lease_size);
inc_rfc1001_len(&req->hdr, server->vals->create_lease_size);
*num_iovec = num + 1; *num_iovec = num + 1;
return 0; return 0;
} }
...@@ -1589,10 +1557,9 @@ add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec, ...@@ -1589,10 +1557,9 @@ add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec,
iov[num].iov_len = sizeof(struct create_durable_v2); iov[num].iov_len = sizeof(struct create_durable_v2);
if (!req->CreateContextsOffset) if (!req->CreateContextsOffset)
req->CreateContextsOffset = req->CreateContextsOffset =
cpu_to_le32(sizeof(struct smb2_create_req) - 4 + cpu_to_le32(sizeof(struct smb2_create_req) +
iov[1].iov_len); iov[1].iov_len);
le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable_v2)); le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable_v2));
inc_rfc1001_len(&req->hdr, sizeof(struct create_durable_v2));
*num_iovec = num + 1; *num_iovec = num + 1;
return 0; return 0;
} }
...@@ -1613,12 +1580,10 @@ add_durable_reconnect_v2_context(struct kvec *iov, unsigned int *num_iovec, ...@@ -1613,12 +1580,10 @@ add_durable_reconnect_v2_context(struct kvec *iov, unsigned int *num_iovec,
iov[num].iov_len = sizeof(struct create_durable_handle_reconnect_v2); iov[num].iov_len = sizeof(struct create_durable_handle_reconnect_v2);
if (!req->CreateContextsOffset) if (!req->CreateContextsOffset)
req->CreateContextsOffset = req->CreateContextsOffset =
cpu_to_le32(sizeof(struct smb2_create_req) - 4 + cpu_to_le32(sizeof(struct smb2_create_req) +
iov[1].iov_len); iov[1].iov_len);
le32_add_cpu(&req->CreateContextsLength, le32_add_cpu(&req->CreateContextsLength,
sizeof(struct create_durable_handle_reconnect_v2)); sizeof(struct create_durable_handle_reconnect_v2));
inc_rfc1001_len(&req->hdr,
sizeof(struct create_durable_handle_reconnect_v2));
*num_iovec = num + 1; *num_iovec = num + 1;
return 0; return 0;
} }
...@@ -1649,10 +1614,9 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec, ...@@ -1649,10 +1614,9 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
iov[num].iov_len = sizeof(struct create_durable); iov[num].iov_len = sizeof(struct create_durable);
if (!req->CreateContextsOffset) if (!req->CreateContextsOffset)
req->CreateContextsOffset = req->CreateContextsOffset =
cpu_to_le32(sizeof(struct smb2_create_req) - 4 + cpu_to_le32(sizeof(struct smb2_create_req) +
iov[1].iov_len); iov[1].iov_len);
le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable)); le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable));
inc_rfc1001_len(&req->hdr, sizeof(struct create_durable));
*num_iovec = num + 1; *num_iovec = num + 1;
return 0; return 0;
} }
...@@ -1723,6 +1687,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ...@@ -1723,6 +1687,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
__u32 file_attributes = 0; __u32 file_attributes = 0;
char *dhc_buf = NULL, *lc_buf = NULL; char *dhc_buf = NULL, *lc_buf = NULL;
int flags = 0; int flags = 0;
unsigned int total_len;
cifs_dbg(FYI, "create/open\n"); cifs_dbg(FYI, "create/open\n");
...@@ -1731,7 +1696,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ...@@ -1731,7 +1696,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
else else
return -EIO; return -EIO;
rc = small_smb2_init(SMB2_CREATE, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len);
if (rc) if (rc)
return rc; return rc;
...@@ -1752,12 +1718,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ...@@ -1752,12 +1718,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK); req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK);
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field */
iov[0].iov_len = get_rfc1002_length(req) + 4;
/* -1 since last byte is buf[0] which is sent below (path) */ /* -1 since last byte is buf[0] which is sent below (path) */
iov[0].iov_len--; iov[0].iov_len = total_len - 1;
req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4); req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req));
/* [MS-SMB2] 2.2.13 NameOffset: /* [MS-SMB2] 2.2.13 NameOffset:
* If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of * If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of
...@@ -1770,7 +1734,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ...@@ -1770,7 +1734,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
if (tcon->share_flags & SHI1005_FLAGS_DFS) { if (tcon->share_flags & SHI1005_FLAGS_DFS) {
int name_len; int name_len;
req->hdr.sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
rc = alloc_path_with_tree_prefix(&copy_path, &copy_size, rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
&name_len, &name_len,
tcon->treeName, path); tcon->treeName, path);
...@@ -1797,8 +1761,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ...@@ -1797,8 +1761,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
iov[1].iov_len = uni_path_len; iov[1].iov_len = uni_path_len;
iov[1].iov_base = path; iov[1].iov_base = path;
/* -1 since last byte is buf[0] which was counted in smb2_buf_len */
inc_rfc1001_len(req, uni_path_len - 1);
if (!server->oplocks) if (!server->oplocks)
*oplock = SMB2_OPLOCK_LEVEL_NONE; *oplock = SMB2_OPLOCK_LEVEL_NONE;
...@@ -1836,7 +1798,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ...@@ -1836,7 +1798,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
dhc_buf = iov[n_iov-1].iov_base; dhc_buf = iov[n_iov-1].iov_base;
} }
rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
&rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
...@@ -1877,7 +1840,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ...@@ -1877,7 +1840,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
*/ */
int int
SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
u64 volatile_fid, u32 opcode, bool is_fsctl, bool use_ipc, u64 volatile_fid, u32 opcode, bool is_fsctl,
char *in_data, u32 indatalen, char *in_data, u32 indatalen,
char **out_data, u32 *plen /* returned data len */) char **out_data, u32 *plen /* returned data len */)
{ {
...@@ -1891,6 +1854,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ...@@ -1891,6 +1854,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
int n_iov; int n_iov;
int rc = 0; int rc = 0;
int flags = 0; int flags = 0;
unsigned int total_len;
cifs_dbg(FYI, "SMB2 IOCTL\n"); cifs_dbg(FYI, "SMB2 IOCTL\n");
...@@ -1909,20 +1873,10 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ...@@ -1909,20 +1873,10 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (!ses || !(ses->server)) if (!ses || !(ses->server))
return -EIO; return -EIO;
rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_IOCTL, tcon, (void **) &req, &total_len);
if (rc) if (rc)
return rc; return rc;
if (use_ipc) {
if (ses->ipc_tid == 0) {
cifs_small_buf_release(req);
return -ENOTCONN;
}
cifs_dbg(FYI, "replacing tid 0x%x with IPC tid 0x%x\n",
req->hdr.sync_hdr.TreeId, ses->ipc_tid);
req->hdr.sync_hdr.TreeId = ses->ipc_tid;
}
if (encryption_required(tcon)) if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
...@@ -1934,7 +1888,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ...@@ -1934,7 +1888,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
req->InputCount = cpu_to_le32(indatalen); req->InputCount = cpu_to_le32(indatalen);
/* do not set InputOffset if no input data */ /* do not set InputOffset if no input data */
req->InputOffset = req->InputOffset =
cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer) - 4); cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer));
iov[1].iov_base = in_data; iov[1].iov_base = in_data;
iov[1].iov_len = indatalen; iov[1].iov_len = indatalen;
n_iov = 2; n_iov = 2;
...@@ -1969,21 +1923,20 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ...@@ -1969,21 +1923,20 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
* but if input data passed to ioctl, we do not * but if input data passed to ioctl, we do not
* want to double count this, so we do not send * want to double count this, so we do not send
* the dummy one byte of data in iovec[0] if sending * the dummy one byte of data in iovec[0] if sending
* input data (in iovec[1]). We also must add 4 bytes * input data (in iovec[1]).
* in first iovec to allow for rfc1002 length field.
*/ */
if (indatalen) { if (indatalen) {
iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; iov[0].iov_len = total_len - 1;
inc_rfc1001_len(req, indatalen - 1);
} else } else
iov[0].iov_len = get_rfc1002_length(req) + 4; iov[0].iov_len = total_len;
/* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */ /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */
if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO)
req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED; req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
&rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base; rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base;
...@@ -2052,7 +2005,6 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -2052,7 +2005,6 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */, FSCTL_SET_COMPRESSION, true /* is_fsctl */,
false /* use_ipc */,
(char *)&fsctl_input /* data input */, (char *)&fsctl_input /* data input */,
2 /* in data len */, &ret_data /* out data */, NULL); 2 /* in data len */, &ret_data /* out data */, NULL);
...@@ -2073,13 +2025,14 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -2073,13 +2025,14 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buftype; int resp_buftype;
int rc = 0; int rc = 0;
int flags = 0; int flags = 0;
unsigned int total_len;
cifs_dbg(FYI, "Close\n"); cifs_dbg(FYI, "Close\n");
if (!ses || !(ses->server)) if (!ses || !(ses->server))
return -EIO; return -EIO;
rc = small_smb2_init(SMB2_CLOSE, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len);
if (rc) if (rc)
return rc; return rc;
...@@ -2090,10 +2043,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -2090,10 +2043,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
req->VolatileFileId = volatile_fid; req->VolatileFileId = volatile_fid;
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field */ iov[0].iov_len = total_len;
iov[0].iov_len = get_rfc1002_length(req) + 4;
rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_close_rsp *)rsp_iov.iov_base; rsp = (struct smb2_close_rsp *)rsp_iov.iov_base;
...@@ -2180,13 +2132,15 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -2180,13 +2132,15 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buftype; int resp_buftype;
struct cifs_ses *ses = tcon->ses; struct cifs_ses *ses = tcon->ses;
int flags = 0; int flags = 0;
unsigned int total_len;
cifs_dbg(FYI, "Query Info\n"); cifs_dbg(FYI, "Query Info\n");
if (!ses || !(ses->server)) if (!ses || !(ses->server))
return -EIO; return -EIO;
rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req,
&total_len);
if (rc) if (rc)
return rc; return rc;
...@@ -2203,15 +2157,14 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -2203,15 +2157,14 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
* We do not use the input buffer (do not send extra byte) * We do not use the input buffer (do not send extra byte)
*/ */
req->InputBufferOffset = 0; req->InputBufferOffset = 0;
inc_rfc1001_len(req, -1);
req->OutputBufferLength = cpu_to_le32(output_len); req->OutputBufferLength = cpu_to_le32(output_len);
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field */ /* 1 for Buffer */
iov[0].iov_len = get_rfc1002_length(req) + 4; iov[0].iov_len = total_len - 1;
rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
...@@ -2338,6 +2291,10 @@ void smb2_reconnect_server(struct work_struct *work) ...@@ -2338,6 +2291,10 @@ void smb2_reconnect_server(struct work_struct *work)
tcon_exist = true; tcon_exist = true;
} }
} }
if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) {
list_add_tail(&ses->tcon_ipc->rlist, &tmp_list);
tcon_exist = true;
}
} }
/* /*
* Get the reference to server struct to be sure that the last call of * Get the reference to server struct to be sure that the last call of
...@@ -2376,6 +2333,8 @@ SMB2_echo(struct TCP_Server_Info *server) ...@@ -2376,6 +2333,8 @@ SMB2_echo(struct TCP_Server_Info *server)
struct kvec iov[2]; struct kvec iov[2];
struct smb_rqst rqst = { .rq_iov = iov, struct smb_rqst rqst = { .rq_iov = iov,
.rq_nvec = 2 }; .rq_nvec = 2 };
unsigned int total_len;
__be32 rfc1002_marker;
cifs_dbg(FYI, "In echo request\n"); cifs_dbg(FYI, "In echo request\n");
...@@ -2385,17 +2344,17 @@ SMB2_echo(struct TCP_Server_Info *server) ...@@ -2385,17 +2344,17 @@ SMB2_echo(struct TCP_Server_Info *server)
return rc; return rc;
} }
rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); rc = smb2_plain_req_init(SMB2_ECHO, NULL, (void **)&req, &total_len);
if (rc) if (rc)
return rc; return rc;
req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1); req->sync_hdr.CreditRequest = cpu_to_le16(1);
/* 4 for rfc1002 length field */
iov[0].iov_len = 4; iov[0].iov_len = 4;
iov[0].iov_base = (char *)req; rfc1002_marker = cpu_to_be32(total_len);
iov[1].iov_len = get_rfc1002_length(req); iov[0].iov_base = &rfc1002_marker;
iov[1].iov_base = (char *)req + 4; iov[1].iov_len = total_len;
iov[1].iov_base = (char *)req;
rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, NULL, rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, NULL,
server, CIFS_ECHO_OP); server, CIFS_ECHO_OP);
...@@ -2417,13 +2376,14 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ...@@ -2417,13 +2376,14 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
int resp_buftype; int resp_buftype;
int rc = 0; int rc = 0;
int flags = 0; int flags = 0;
unsigned int total_len;
cifs_dbg(FYI, "Flush\n"); cifs_dbg(FYI, "Flush\n");
if (!ses || !(ses->server)) if (!ses || !(ses->server))
return -EIO; return -EIO;
rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_FLUSH, tcon, (void **) &req, &total_len);
if (rc) if (rc)
return rc; return rc;
...@@ -2434,10 +2394,9 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ...@@ -2434,10 +2394,9 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
req->VolatileFileId = volatile_fid; req->VolatileFileId = volatile_fid;
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field */ iov[0].iov_len = total_len;
iov[0].iov_len = get_rfc1002_length(req) + 4;
rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
if (rc != 0) if (rc != 0)
...@@ -2453,18 +2412,21 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ...@@ -2453,18 +2412,21 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
*/ */
static int static int
smb2_new_read_req(void **buf, unsigned int *total_len, smb2_new_read_req(void **buf, unsigned int *total_len,
struct cifs_io_parms *io_parms, unsigned int remaining_bytes, struct cifs_io_parms *io_parms, struct cifs_readdata *rdata,
int request_type) unsigned int remaining_bytes, int request_type)
{ {
int rc = -EACCES; int rc = -EACCES;
struct smb2_read_plain_req *req = NULL; struct smb2_read_plain_req *req = NULL;
struct smb2_sync_hdr *shdr; struct smb2_sync_hdr *shdr;
struct TCP_Server_Info *server;
rc = smb2_plain_req_init(SMB2_READ, io_parms->tcon, (void **) &req, rc = smb2_plain_req_init(SMB2_READ, io_parms->tcon, (void **) &req,
total_len); total_len);
if (rc) if (rc)
return rc; return rc;
if (io_parms->tcon->ses->server == NULL)
server = io_parms->tcon->ses->server;
if (server == NULL)
return -ECONNABORTED; return -ECONNABORTED;
shdr = &req->sync_hdr; shdr = &req->sync_hdr;
...@@ -2478,7 +2440,40 @@ smb2_new_read_req(void **buf, unsigned int *total_len, ...@@ -2478,7 +2440,40 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
req->MinimumCount = 0; req->MinimumCount = 0;
req->Length = cpu_to_le32(io_parms->length); req->Length = cpu_to_le32(io_parms->length);
req->Offset = cpu_to_le64(io_parms->offset); req->Offset = cpu_to_le64(io_parms->offset);
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
* If we want to do a RDMA write, fill in and append
* smbd_buffer_descriptor_v1 to the end of read request
*/
if (server->rdma && rdata &&
rdata->bytes >= server->smbd_conn->rdma_readwrite_threshold) {
struct smbd_buffer_descriptor_v1 *v1;
bool need_invalidate =
io_parms->tcon->ses->server->dialect == SMB30_PROT_ID;
rdata->mr = smbd_register_mr(
server->smbd_conn, rdata->pages,
rdata->nr_pages, rdata->tailsz,
true, need_invalidate);
if (!rdata->mr)
return -ENOBUFS;
req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
if (need_invalidate)
req->Channel = SMB2_CHANNEL_RDMA_V1;
req->ReadChannelInfoOffset =
cpu_to_le16(offsetof(struct smb2_read_plain_req, Buffer));
req->ReadChannelInfoLength =
cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1));
v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0];
v1->offset = cpu_to_le64(rdata->mr->mr->iova);
v1->token = cpu_to_le32(rdata->mr->mr->rkey);
v1->length = cpu_to_le32(rdata->mr->mr->length);
*total_len += sizeof(*v1) - 1;
}
#endif
if (request_type & CHAINED_REQUEST) { if (request_type & CHAINED_REQUEST) {
if (!(request_type & END_OF_CHAIN)) { if (!(request_type & END_OF_CHAIN)) {
/* next 8-byte aligned request */ /* next 8-byte aligned request */
...@@ -2557,7 +2552,17 @@ smb2_readv_callback(struct mid_q_entry *mid) ...@@ -2557,7 +2552,17 @@ smb2_readv_callback(struct mid_q_entry *mid)
if (rdata->result != -ENODATA) if (rdata->result != -ENODATA)
rdata->result = -EIO; rdata->result = -EIO;
} }
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
* If this rdata has a memmory registered, the MR can be freed
* MR needs to be freed as soon as I/O finishes to prevent deadlock
* because they have limited number and are used for future I/Os
*/
if (rdata->mr) {
smbd_deregister_mr(rdata->mr);
rdata->mr = NULL;
}
#endif
if (rdata->result) if (rdata->result)
cifs_stats_fail_inc(tcon, SMB2_READ_HE); cifs_stats_fail_inc(tcon, SMB2_READ_HE);
...@@ -2592,7 +2597,8 @@ smb2_async_readv(struct cifs_readdata *rdata) ...@@ -2592,7 +2597,8 @@ smb2_async_readv(struct cifs_readdata *rdata)
server = io_parms.tcon->ses->server; server = io_parms.tcon->ses->server;
rc = smb2_new_read_req((void **) &buf, &total_len, &io_parms, 0, 0); rc = smb2_new_read_req(
(void **) &buf, &total_len, &io_parms, rdata, 0, 0);
if (rc) { if (rc) {
if (rc == -EAGAIN && rdata->credits) { if (rc == -EAGAIN && rdata->credits) {
/* credits was reset by reconnect */ /* credits was reset by reconnect */
...@@ -2650,31 +2656,24 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, ...@@ -2650,31 +2656,24 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
struct smb2_read_plain_req *req = NULL; struct smb2_read_plain_req *req = NULL;
struct smb2_read_rsp *rsp = NULL; struct smb2_read_rsp *rsp = NULL;
struct smb2_sync_hdr *shdr; struct smb2_sync_hdr *shdr;
struct kvec iov[2]; struct kvec iov[1];
struct kvec rsp_iov; struct kvec rsp_iov;
unsigned int total_len; unsigned int total_len;
__be32 req_len;
struct smb_rqst rqst = { .rq_iov = iov,
.rq_nvec = 2 };
int flags = CIFS_LOG_ERROR; int flags = CIFS_LOG_ERROR;
struct cifs_ses *ses = io_parms->tcon->ses; struct cifs_ses *ses = io_parms->tcon->ses;
*nbytes = 0; *nbytes = 0;
rc = smb2_new_read_req((void **)&req, &total_len, io_parms, 0, 0); rc = smb2_new_read_req((void **)&req, &total_len, io_parms, NULL, 0, 0);
if (rc) if (rc)
return rc; return rc;
if (encryption_required(io_parms->tcon)) if (encryption_required(io_parms->tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
req_len = cpu_to_be32(total_len); iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
iov[0].iov_base = &req_len;
iov[0].iov_len = sizeof(__be32);
iov[1].iov_base = req;
iov[1].iov_len = total_len;
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
...@@ -2755,7 +2754,19 @@ smb2_writev_callback(struct mid_q_entry *mid) ...@@ -2755,7 +2754,19 @@ smb2_writev_callback(struct mid_q_entry *mid)
wdata->result = -EIO; wdata->result = -EIO;
break; break;
} }
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
* If this wdata has a memory registered, the MR can be freed
* The number of MRs available is limited, it's important to recover
* used MR as soon as I/O is finished. Hold MR longer in the later
* I/O process can possibly result in I/O deadlock due to lack of MR
* to send request on I/O retry
*/
if (wdata->mr) {
smbd_deregister_mr(wdata->mr);
wdata->mr = NULL;
}
#endif
if (wdata->result) if (wdata->result)
cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
...@@ -2776,8 +2787,10 @@ smb2_async_writev(struct cifs_writedata *wdata, ...@@ -2776,8 +2787,10 @@ smb2_async_writev(struct cifs_writedata *wdata,
struct TCP_Server_Info *server = tcon->ses->server; struct TCP_Server_Info *server = tcon->ses->server;
struct kvec iov[2]; struct kvec iov[2];
struct smb_rqst rqst = { }; struct smb_rqst rqst = { };
unsigned int total_len;
__be32 rfc1002_marker;
rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_WRITE, tcon, (void **) &req, &total_len);
if (rc) { if (rc) {
if (rc == -EAGAIN && wdata->credits) { if (rc == -EAGAIN && wdata->credits) {
/* credits was reset by reconnect */ /* credits was reset by reconnect */
...@@ -2793,7 +2806,7 @@ smb2_async_writev(struct cifs_writedata *wdata, ...@@ -2793,7 +2806,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
if (encryption_required(tcon)) if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
shdr = get_sync_hdr(req); shdr = (struct smb2_sync_hdr *)req;
shdr->ProcessId = cpu_to_le32(wdata->cfile->pid); shdr->ProcessId = cpu_to_le32(wdata->cfile->pid);
req->PersistentFileId = wdata->cfile->fid.persistent_fid; req->PersistentFileId = wdata->cfile->fid.persistent_fid;
...@@ -2802,16 +2815,51 @@ smb2_async_writev(struct cifs_writedata *wdata, ...@@ -2802,16 +2815,51 @@ smb2_async_writev(struct cifs_writedata *wdata,
req->WriteChannelInfoLength = 0; req->WriteChannelInfoLength = 0;
req->Channel = 0; req->Channel = 0;
req->Offset = cpu_to_le64(wdata->offset); req->Offset = cpu_to_le64(wdata->offset);
/* 4 for rfc1002 length field */
req->DataOffset = cpu_to_le16( req->DataOffset = cpu_to_le16(
offsetof(struct smb2_write_req, Buffer) - 4); offsetof(struct smb2_write_req, Buffer));
req->RemainingBytes = 0; req->RemainingBytes = 0;
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
* If we want to do a server RDMA read, fill in and append
* smbd_buffer_descriptor_v1 to the end of write request
*/
if (server->rdma && wdata->bytes >=
server->smbd_conn->rdma_readwrite_threshold) {
struct smbd_buffer_descriptor_v1 *v1;
bool need_invalidate = server->dialect == SMB30_PROT_ID;
wdata->mr = smbd_register_mr(
server->smbd_conn, wdata->pages,
wdata->nr_pages, wdata->tailsz,
false, need_invalidate);
if (!wdata->mr) {
rc = -ENOBUFS;
goto async_writev_out;
}
req->Length = 0;
req->DataOffset = 0;
req->RemainingBytes =
cpu_to_le32((wdata->nr_pages-1)*PAGE_SIZE + wdata->tailsz);
req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
if (need_invalidate)
req->Channel = SMB2_CHANNEL_RDMA_V1;
req->WriteChannelInfoOffset =
cpu_to_le16(offsetof(struct smb2_write_req, Buffer));
req->WriteChannelInfoLength =
cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1));
v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0];
v1->offset = cpu_to_le64(wdata->mr->mr->iova);
v1->token = cpu_to_le32(wdata->mr->mr->rkey);
v1->length = cpu_to_le32(wdata->mr->mr->length);
}
#endif
/* 4 for rfc1002 length field and 1 for Buffer */ /* 4 for rfc1002 length field and 1 for Buffer */
iov[0].iov_len = 4; iov[0].iov_len = 4;
iov[0].iov_base = req; rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes);
iov[1].iov_len = get_rfc1002_length(req) - 1; iov[0].iov_base = &rfc1002_marker;
iov[1].iov_base = (char *)req + 4; iov[1].iov_len = total_len - 1;
iov[1].iov_base = (char *)req;
rqst.rq_iov = iov; rqst.rq_iov = iov;
rqst.rq_nvec = 2; rqst.rq_nvec = 2;
...@@ -2819,13 +2867,22 @@ smb2_async_writev(struct cifs_writedata *wdata, ...@@ -2819,13 +2867,22 @@ smb2_async_writev(struct cifs_writedata *wdata,
rqst.rq_npages = wdata->nr_pages; rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz; rqst.rq_pagesz = wdata->pagesz;
rqst.rq_tailsz = wdata->tailsz; rqst.rq_tailsz = wdata->tailsz;
#ifdef CONFIG_CIFS_SMB_DIRECT
if (wdata->mr) {
iov[1].iov_len += sizeof(struct smbd_buffer_descriptor_v1);
rqst.rq_npages = 0;
}
#endif
cifs_dbg(FYI, "async write at %llu %u bytes\n", cifs_dbg(FYI, "async write at %llu %u bytes\n",
wdata->offset, wdata->bytes); wdata->offset, wdata->bytes);
#ifdef CONFIG_CIFS_SMB_DIRECT
/* For RDMA read, I/O size is in RemainingBytes not in Length */
if (!wdata->mr)
req->Length = cpu_to_le32(wdata->bytes); req->Length = cpu_to_le32(wdata->bytes);
#else
inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); req->Length = cpu_to_le32(wdata->bytes);
#endif
if (wdata->credits) { if (wdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
...@@ -2869,13 +2926,15 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, ...@@ -2869,13 +2926,15 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
int resp_buftype; int resp_buftype;
struct kvec rsp_iov; struct kvec rsp_iov;
int flags = 0; int flags = 0;
unsigned int total_len;
*nbytes = 0; *nbytes = 0;
if (n_vec < 1) if (n_vec < 1)
return rc; return rc;
rc = small_smb2_init(SMB2_WRITE, io_parms->tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_WRITE, io_parms->tcon, (void **) &req,
&total_len);
if (rc) if (rc)
return rc; return rc;
...@@ -2885,7 +2944,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, ...@@ -2885,7 +2944,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
if (encryption_required(io_parms->tcon)) if (encryption_required(io_parms->tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
req->hdr.sync_hdr.ProcessId = cpu_to_le32(io_parms->pid); req->sync_hdr.ProcessId = cpu_to_le32(io_parms->pid);
req->PersistentFileId = io_parms->persistent_fid; req->PersistentFileId = io_parms->persistent_fid;
req->VolatileFileId = io_parms->volatile_fid; req->VolatileFileId = io_parms->volatile_fid;
...@@ -2894,19 +2953,15 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, ...@@ -2894,19 +2953,15 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
req->Channel = 0; req->Channel = 0;
req->Length = cpu_to_le32(io_parms->length); req->Length = cpu_to_le32(io_parms->length);
req->Offset = cpu_to_le64(io_parms->offset); req->Offset = cpu_to_le64(io_parms->offset);
/* 4 for rfc1002 length field */
req->DataOffset = cpu_to_le16( req->DataOffset = cpu_to_le16(
offsetof(struct smb2_write_req, Buffer) - 4); offsetof(struct smb2_write_req, Buffer));
req->RemainingBytes = 0; req->RemainingBytes = 0;
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field and 1 for Buffer */ /* 1 for Buffer */
iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; iov[0].iov_len = total_len - 1;
/* length of entire message including data to be written */
inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */);
rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1, rc = smb2_send_recv(xid, io_parms->tcon->ses, iov, n_vec + 1,
&resp_buftype, flags, &rsp_iov); &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base; rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
...@@ -2984,13 +3039,15 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -2984,13 +3039,15 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
unsigned int output_size = CIFSMaxBufSize; unsigned int output_size = CIFSMaxBufSize;
size_t info_buf_size; size_t info_buf_size;
int flags = 0; int flags = 0;
unsigned int total_len;
if (ses && (ses->server)) if (ses && (ses->server))
server = ses->server; server = ses->server;
else else
return -EIO; return -EIO;
rc = small_smb2_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req,
&total_len);
if (rc) if (rc)
return rc; return rc;
...@@ -3022,7 +3079,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3022,7 +3079,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
memcpy(bufptr, &asteriks, len); memcpy(bufptr, &asteriks, len);
req->FileNameOffset = req->FileNameOffset =
cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1 - 4); cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1);
req->FileNameLength = cpu_to_le16(len); req->FileNameLength = cpu_to_le16(len);
/* /*
* BB could be 30 bytes or so longer if we used SMB2 specific * BB could be 30 bytes or so longer if we used SMB2 specific
...@@ -3033,15 +3090,13 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3033,15 +3090,13 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
req->OutputBufferLength = cpu_to_le32(output_size); req->OutputBufferLength = cpu_to_le32(output_size);
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for RFC1001 length and 1 for Buffer */ /* 1 for Buffer */
iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; iov[0].iov_len = total_len - 1;
iov[1].iov_base = (char *)(req->Buffer); iov[1].iov_base = (char *)(req->Buffer);
iov[1].iov_len = len; iov[1].iov_len = len;
inc_rfc1001_len(req, len - 1 /* Buffer */); rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base; rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base;
...@@ -3110,6 +3165,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3110,6 +3165,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
unsigned int i; unsigned int i;
struct cifs_ses *ses = tcon->ses; struct cifs_ses *ses = tcon->ses;
int flags = 0; int flags = 0;
unsigned int total_len;
if (!ses || !(ses->server)) if (!ses || !(ses->server))
return -EIO; return -EIO;
...@@ -3121,7 +3177,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3121,7 +3177,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
if (!iov) if (!iov)
return -ENOMEM; return -ENOMEM;
rc = small_smb2_init(SMB2_SET_INFO, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_SET_INFO, tcon, (void **) &req, &total_len);
if (rc) { if (rc) {
kfree(iov); kfree(iov);
return rc; return rc;
...@@ -3130,7 +3186,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3130,7 +3186,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
if (encryption_required(tcon)) if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid); req->sync_hdr.ProcessId = cpu_to_le32(pid);
req->InfoType = info_type; req->InfoType = info_type;
req->FileInfoClass = info_class; req->FileInfoClass = info_class;
...@@ -3138,27 +3194,25 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3138,27 +3194,25 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
req->VolatileFileId = volatile_fid; req->VolatileFileId = volatile_fid;
req->AdditionalInformation = cpu_to_le32(additional_info); req->AdditionalInformation = cpu_to_le32(additional_info);
/* 4 for RFC1001 length and 1 for Buffer */
req->BufferOffset = req->BufferOffset =
cpu_to_le16(sizeof(struct smb2_set_info_req) - 1 - 4); cpu_to_le16(sizeof(struct smb2_set_info_req) - 1);
req->BufferLength = cpu_to_le32(*size); req->BufferLength = cpu_to_le32(*size);
inc_rfc1001_len(req, *size - 1 /* Buffer */);
memcpy(req->Buffer, *data, *size); memcpy(req->Buffer, *data, *size);
total_len += *size;
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for RFC1001 length */ /* 1 for Buffer */
iov[0].iov_len = get_rfc1002_length(req) + 4; iov[0].iov_len = total_len - 1;
for (i = 1; i < num; i++) { for (i = 1; i < num; i++) {
inc_rfc1001_len(req, size[i]);
le32_add_cpu(&req->BufferLength, size[i]); le32_add_cpu(&req->BufferLength, size[i]);
iov[i].iov_base = (char *)data[i]; iov[i].iov_base = (char *)data[i];
iov[i].iov_len = size[i]; iov[i].iov_len = size[i];
} }
rc = SendReceive2(xid, ses, iov, num, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, iov, num, &resp_buftype, flags,
&rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base; rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
...@@ -3310,11 +3364,17 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3310,11 +3364,17 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 oplock_level) __u8 oplock_level)
{ {
int rc; int rc;
struct smb2_oplock_break *req = NULL; struct smb2_oplock_break_req *req = NULL;
struct cifs_ses *ses = tcon->ses;
int flags = CIFS_OBREAK_OP; int flags = CIFS_OBREAK_OP;
unsigned int total_len;
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buf_type;
cifs_dbg(FYI, "SMB2_oplock_break\n"); cifs_dbg(FYI, "SMB2_oplock_break\n");
rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req,
&total_len);
if (rc) if (rc)
return rc; return rc;
...@@ -3324,9 +3384,14 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3324,9 +3384,14 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
req->VolatileFid = volatile_fid; req->VolatileFid = volatile_fid;
req->PersistentFid = persistent_fid; req->PersistentFid = persistent_fid;
req->OplockLevel = oplock_level; req->OplockLevel = oplock_level;
req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1); req->sync_hdr.CreditRequest = cpu_to_le16(1);
flags |= CIFS_NO_RESP;
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, flags); iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
if (rc) { if (rc) {
...@@ -3355,13 +3420,15 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, ...@@ -3355,13 +3420,15 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
{ {
int rc; int rc;
struct smb2_query_info_req *req; struct smb2_query_info_req *req;
unsigned int total_len;
cifs_dbg(FYI, "Query FSInfo level %d\n", level); cifs_dbg(FYI, "Query FSInfo level %d\n", level);
if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
return -EIO; return -EIO;
rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req,
&total_len);
if (rc) if (rc)
return rc; return rc;
...@@ -3369,15 +3436,14 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, ...@@ -3369,15 +3436,14 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
req->FileInfoClass = level; req->FileInfoClass = level;
req->PersistentFileId = persistent_fid; req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid; req->VolatileFileId = volatile_fid;
/* 4 for rfc1002 length field and 1 for pad */ /* 1 for pad */
req->InputBufferOffset = req->InputBufferOffset =
cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); cpu_to_le16(sizeof(struct smb2_query_info_req) - 1);
req->OutputBufferLength = cpu_to_le32( req->OutputBufferLength = cpu_to_le32(
outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4); outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4);
iov->iov_base = (char *)req; iov->iov_base = (char *)req;
/* 4 for rfc1002 length field */ iov->iov_len = total_len;
iov->iov_len = get_rfc1002_length(req) + 4;
return 0; return 0;
} }
...@@ -3403,7 +3469,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3403,7 +3469,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
if (encryption_required(tcon)) if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(iov.iov_base); cifs_small_buf_release(iov.iov_base);
if (rc) { if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
...@@ -3459,7 +3525,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3459,7 +3525,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
if (encryption_required(tcon)) if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov); rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(iov.iov_base); cifs_small_buf_release(iov.iov_base);
if (rc) { if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
...@@ -3505,33 +3571,32 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3505,33 +3571,32 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buf_type; int resp_buf_type;
unsigned int count; unsigned int count;
int flags = CIFS_NO_RESP; int flags = CIFS_NO_RESP;
unsigned int total_len;
cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock); cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock);
rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_LOCK, tcon, (void **) &req, &total_len);
if (rc) if (rc)
return rc; return rc;
if (encryption_required(tcon)) if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid); req->sync_hdr.ProcessId = cpu_to_le32(pid);
req->LockCount = cpu_to_le16(num_lock); req->LockCount = cpu_to_le16(num_lock);
req->PersistentFileId = persist_fid; req->PersistentFileId = persist_fid;
req->VolatileFileId = volatile_fid; req->VolatileFileId = volatile_fid;
count = num_lock * sizeof(struct smb2_lock_element); count = num_lock * sizeof(struct smb2_lock_element);
inc_rfc1001_len(req, count - sizeof(struct smb2_lock_element));
iov[0].iov_base = (char *)req; iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field and count for all locks */ iov[0].iov_len = total_len - sizeof(struct smb2_lock_element);
iov[0].iov_len = get_rfc1002_length(req) + 4 - count;
iov[1].iov_base = (char *)buf; iov[1].iov_base = (char *)buf;
iov[1].iov_len = count; iov[1].iov_len = count;
cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, flags, rc = smb2_send_recv(xid, tcon->ses, iov, 2, &resp_buf_type, flags,
&rsp_iov); &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
if (rc) { if (rc) {
...@@ -3565,24 +3630,35 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, ...@@ -3565,24 +3630,35 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
{ {
int rc; int rc;
struct smb2_lease_ack *req = NULL; struct smb2_lease_ack *req = NULL;
struct cifs_ses *ses = tcon->ses;
int flags = CIFS_OBREAK_OP; int flags = CIFS_OBREAK_OP;
unsigned int total_len;
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buf_type;
cifs_dbg(FYI, "SMB2_lease_break\n"); cifs_dbg(FYI, "SMB2_lease_break\n");
rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req,
&total_len);
if (rc) if (rc)
return rc; return rc;
if (encryption_required(tcon)) if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ; flags |= CIFS_TRANSFORM_REQ;
req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1); req->sync_hdr.CreditRequest = cpu_to_le16(1);
req->StructureSize = cpu_to_le16(36); req->StructureSize = cpu_to_le16(36);
inc_rfc1001_len(req, 12); total_len += 12;
memcpy(req->LeaseKey, lease_key, 16); memcpy(req->LeaseKey, lease_key, 16);
req->LeaseState = lease_state; req->LeaseState = lease_state;
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, flags); flags |= CIFS_NO_RESP;
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req); cifs_small_buf_release(req);
if (rc) { if (rc) {
......
...@@ -195,7 +195,7 @@ struct smb2_symlink_err_rsp { ...@@ -195,7 +195,7 @@ struct smb2_symlink_err_rsp {
#define SMB2_CLIENT_GUID_SIZE 16 #define SMB2_CLIENT_GUID_SIZE 16
struct smb2_negotiate_req { struct smb2_negotiate_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 36 */ __le16 StructureSize; /* Must be 36 */
__le16 DialectCount; __le16 DialectCount;
__le16 SecurityMode; __le16 SecurityMode;
...@@ -282,7 +282,7 @@ struct smb2_negotiate_rsp { ...@@ -282,7 +282,7 @@ struct smb2_negotiate_rsp {
#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04 #define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04
struct smb2_sess_setup_req { struct smb2_sess_setup_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 25 */ __le16 StructureSize; /* Must be 25 */
__u8 Flags; __u8 Flags;
__u8 SecurityMode; __u8 SecurityMode;
...@@ -308,7 +308,7 @@ struct smb2_sess_setup_rsp { ...@@ -308,7 +308,7 @@ struct smb2_sess_setup_rsp {
} __packed; } __packed;
struct smb2_logoff_req { struct smb2_logoff_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */ __le16 StructureSize; /* Must be 4 */
__le16 Reserved; __le16 Reserved;
} __packed; } __packed;
...@@ -323,7 +323,7 @@ struct smb2_logoff_rsp { ...@@ -323,7 +323,7 @@ struct smb2_logoff_rsp {
#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001 #define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001
struct smb2_tree_connect_req { struct smb2_tree_connect_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 9 */ __le16 StructureSize; /* Must be 9 */
__le16 Reserved; /* Flags in SMB3.1.1 */ __le16 Reserved; /* Flags in SMB3.1.1 */
__le16 PathOffset; __le16 PathOffset;
...@@ -375,7 +375,7 @@ struct smb2_tree_connect_rsp { ...@@ -375,7 +375,7 @@ struct smb2_tree_connect_rsp {
#define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ #define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */
struct smb2_tree_disconnect_req { struct smb2_tree_disconnect_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */ __le16 StructureSize; /* Must be 4 */
__le16 Reserved; __le16 Reserved;
} __packed; } __packed;
...@@ -496,7 +496,7 @@ struct smb2_tree_disconnect_rsp { ...@@ -496,7 +496,7 @@ struct smb2_tree_disconnect_rsp {
#define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9 #define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9
struct smb2_create_req { struct smb2_create_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 57 */ __le16 StructureSize; /* Must be 57 */
__u8 SecurityFlags; __u8 SecurityFlags;
__u8 RequestedOplockLevel; __u8 RequestedOplockLevel;
...@@ -753,7 +753,7 @@ struct duplicate_extents_to_file { ...@@ -753,7 +753,7 @@ struct duplicate_extents_to_file {
} __packed; } __packed;
struct smb2_ioctl_req { struct smb2_ioctl_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 57 */ __le16 StructureSize; /* Must be 57 */
__u16 Reserved; __u16 Reserved;
__le32 CtlCode; __le32 CtlCode;
...@@ -789,7 +789,7 @@ struct smb2_ioctl_rsp { ...@@ -789,7 +789,7 @@ struct smb2_ioctl_rsp {
/* Currently defined values for close flags */ /* Currently defined values for close flags */
#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001)
struct smb2_close_req { struct smb2_close_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 24 */ __le16 StructureSize; /* Must be 24 */
__le16 Flags; __le16 Flags;
__le32 Reserved; __le32 Reserved;
...@@ -812,7 +812,7 @@ struct smb2_close_rsp { ...@@ -812,7 +812,7 @@ struct smb2_close_rsp {
} __packed; } __packed;
struct smb2_flush_req { struct smb2_flush_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 24 */ __le16 StructureSize; /* Must be 24 */
__le16 Reserved1; __le16 Reserved1;
__le32 Reserved2; __le32 Reserved2;
...@@ -830,9 +830,9 @@ struct smb2_flush_rsp { ...@@ -830,9 +830,9 @@ struct smb2_flush_rsp {
#define SMB2_READFLAG_READ_UNBUFFERED 0x01 #define SMB2_READFLAG_READ_UNBUFFERED 0x01
/* Channel field for read and write: exactly one of following flags can be set*/ /* Channel field for read and write: exactly one of following flags can be set*/
#define SMB2_CHANNEL_NONE 0x00000000 #define SMB2_CHANNEL_NONE cpu_to_le32(0x00000000)
#define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */ #define SMB2_CHANNEL_RDMA_V1 cpu_to_le32(0x00000001) /* SMB3 or later */
#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000002 /* SMB3.02 or later */ #define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002) /* >= SMB3.02 */
/* SMB2 read request without RFC1001 length at the beginning */ /* SMB2 read request without RFC1001 length at the beginning */
struct smb2_read_plain_req { struct smb2_read_plain_req {
...@@ -847,8 +847,8 @@ struct smb2_read_plain_req { ...@@ -847,8 +847,8 @@ struct smb2_read_plain_req {
__le32 MinimumCount; __le32 MinimumCount;
__le32 Channel; /* MBZ except for SMB3 or later */ __le32 Channel; /* MBZ except for SMB3 or later */
__le32 RemainingBytes; __le32 RemainingBytes;
__le16 ReadChannelInfoOffset; /* Reserved MBZ */ __le16 ReadChannelInfoOffset;
__le16 ReadChannelInfoLength; /* Reserved MBZ */ __le16 ReadChannelInfoLength;
__u8 Buffer[1]; __u8 Buffer[1];
} __packed; } __packed;
...@@ -868,7 +868,7 @@ struct smb2_read_rsp { ...@@ -868,7 +868,7 @@ struct smb2_read_rsp {
#define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */ #define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */
struct smb2_write_req { struct smb2_write_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 49 */ __le16 StructureSize; /* Must be 49 */
__le16 DataOffset; /* offset from start of SMB2 header to write data */ __le16 DataOffset; /* offset from start of SMB2 header to write data */
__le32 Length; __le32 Length;
...@@ -877,8 +877,8 @@ struct smb2_write_req { ...@@ -877,8 +877,8 @@ struct smb2_write_req {
__u64 VolatileFileId; /* opaque endianness */ __u64 VolatileFileId; /* opaque endianness */
__le32 Channel; /* Reserved MBZ */ __le32 Channel; /* Reserved MBZ */
__le32 RemainingBytes; __le32 RemainingBytes;
__le16 WriteChannelInfoOffset; /* Reserved MBZ */ __le16 WriteChannelInfoOffset;
__le16 WriteChannelInfoLength; /* Reserved MBZ */ __le16 WriteChannelInfoLength;
__le32 Flags; __le32 Flags;
__u8 Buffer[1]; __u8 Buffer[1];
} __packed; } __packed;
...@@ -907,7 +907,7 @@ struct smb2_lock_element { ...@@ -907,7 +907,7 @@ struct smb2_lock_element {
} __packed; } __packed;
struct smb2_lock_req { struct smb2_lock_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 48 */ __le16 StructureSize; /* Must be 48 */
__le16 LockCount; __le16 LockCount;
__le32 Reserved; __le32 Reserved;
...@@ -924,7 +924,7 @@ struct smb2_lock_rsp { ...@@ -924,7 +924,7 @@ struct smb2_lock_rsp {
} __packed; } __packed;
struct smb2_echo_req { struct smb2_echo_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */ __le16 StructureSize; /* Must be 4 */
__u16 Reserved; __u16 Reserved;
} __packed; } __packed;
...@@ -942,7 +942,7 @@ struct smb2_echo_rsp { ...@@ -942,7 +942,7 @@ struct smb2_echo_rsp {
#define SMB2_REOPEN 0x10 #define SMB2_REOPEN 0x10
struct smb2_query_directory_req { struct smb2_query_directory_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 33 */ __le16 StructureSize; /* Must be 33 */
__u8 FileInformationClass; __u8 FileInformationClass;
__u8 Flags; __u8 Flags;
...@@ -989,7 +989,7 @@ struct smb2_query_directory_rsp { ...@@ -989,7 +989,7 @@ struct smb2_query_directory_rsp {
#define SL_INDEX_SPECIFIED 0x00000004 #define SL_INDEX_SPECIFIED 0x00000004
struct smb2_query_info_req { struct smb2_query_info_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 41 */ __le16 StructureSize; /* Must be 41 */
__u8 InfoType; __u8 InfoType;
__u8 FileInfoClass; __u8 FileInfoClass;
...@@ -1013,7 +1013,7 @@ struct smb2_query_info_rsp { ...@@ -1013,7 +1013,7 @@ struct smb2_query_info_rsp {
} __packed; } __packed;
struct smb2_set_info_req { struct smb2_set_info_req {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 33 */ __le16 StructureSize; /* Must be 33 */
__u8 InfoType; __u8 InfoType;
__u8 FileInfoClass; __u8 FileInfoClass;
...@@ -1031,7 +1031,19 @@ struct smb2_set_info_rsp { ...@@ -1031,7 +1031,19 @@ struct smb2_set_info_rsp {
__le16 StructureSize; /* Must be 2 */ __le16 StructureSize; /* Must be 2 */
} __packed; } __packed;
struct smb2_oplock_break { /* oplock break without an rfc1002 header */
struct smb2_oplock_break_req {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 24 */
__u8 OplockLevel;
__u8 Reserved;
__le32 Reserved2;
__u64 PersistentFid;
__u64 VolatileFid;
} __packed;
/* oplock break with an rfc1002 header */
struct smb2_oplock_break_rsp {
struct smb2_hdr hdr; struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 24 */ __le16 StructureSize; /* Must be 24 */
__u8 OplockLevel; __u8 OplockLevel;
...@@ -1057,7 +1069,7 @@ struct smb2_lease_break { ...@@ -1057,7 +1069,7 @@ struct smb2_lease_break {
} __packed; } __packed;
struct smb2_lease_ack { struct smb2_lease_ack {
struct smb2_hdr hdr; struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 36 */ __le16 StructureSize; /* Must be 36 */
__le16 Reserved; __le16 Reserved;
__le32 Flags; __le32 Flags;
......
...@@ -125,8 +125,7 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, ...@@ -125,8 +125,7 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
struct smb2_err_rsp **err_buf); struct smb2_err_rsp **err_buf);
extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 opcode, u64 persistent_fid, u64 volatile_fid, u32 opcode,
bool is_fsctl, bool use_ipc, bool is_fsctl, char *in_data, u32 indatalen,
char *in_data, u32 indatalen,
char **out_data, u32 *plen /* returned data len */); char **out_data, u32 *plen /* returned data len */);
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id); u64 persistent_file_id, u64 volatile_file_id);
......
/*
* Copyright (C) 2017, Microsoft Corporation.
*
* Author(s): Long Li <longli@microsoft.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*/
#include <linux/module.h>
#include <linux/highmem.h>
#include "smbdirect.h"
#include "cifs_debug.h"
static struct smbd_response *get_empty_queue_buffer(
struct smbd_connection *info);
static struct smbd_response *get_receive_buffer(
struct smbd_connection *info);
static void put_receive_buffer(
struct smbd_connection *info,
struct smbd_response *response);
static int allocate_receive_buffers(struct smbd_connection *info, int num_buf);
static void destroy_receive_buffers(struct smbd_connection *info);
static void put_empty_packet(
struct smbd_connection *info, struct smbd_response *response);
static void enqueue_reassembly(
struct smbd_connection *info,
struct smbd_response *response, int data_length);
static struct smbd_response *_get_first_reassembly(
struct smbd_connection *info);
static int smbd_post_recv(
struct smbd_connection *info,
struct smbd_response *response);
static int smbd_post_send_empty(struct smbd_connection *info);
static int smbd_post_send_data(
struct smbd_connection *info,
struct kvec *iov, int n_vec, int remaining_data_length);
static int smbd_post_send_page(struct smbd_connection *info,
struct page *page, unsigned long offset,
size_t size, int remaining_data_length);
static void destroy_mr_list(struct smbd_connection *info);
static int allocate_mr_list(struct smbd_connection *info);
/* SMBD version number */
#define SMBD_V1 0x0100
/* Port numbers for SMBD transport */
#define SMB_PORT 445
#define SMBD_PORT 5445
/* Address lookup and resolve timeout in ms */
#define RDMA_RESOLVE_TIMEOUT 5000
/* SMBD negotiation timeout in seconds */
#define SMBD_NEGOTIATE_TIMEOUT 120
/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */
#define SMBD_MIN_RECEIVE_SIZE 128
#define SMBD_MIN_FRAGMENTED_SIZE 131072
/*
* Default maximum number of RDMA read/write outstanding on this connection
* This value is possibly decreased during QP creation on hardware limit
*/
#define SMBD_CM_RESPONDER_RESOURCES 32
/* Maximum number of retries on data transfer operations */
#define SMBD_CM_RETRY 6
/* No need to retry on Receiver Not Ready since SMBD manages credits */
#define SMBD_CM_RNR_RETRY 0
/*
* User configurable initial values per SMBD transport connection
* as defined in [MS-SMBD] 3.1.1.1
* Those may change after a SMBD negotiation
*/
/* The local peer's maximum number of credits to grant to the peer */
int smbd_receive_credit_max = 255;
/* The remote peer's credit request of local peer */
int smbd_send_credit_target = 255;
/* The maximum single message size can be sent to remote peer */
int smbd_max_send_size = 1364;
/* The maximum fragmented upper-layer payload receive size supported */
int smbd_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */
int smbd_max_receive_size = 8192;
/* The timeout to initiate send of a keepalive message on idle */
int smbd_keep_alive_interval = 120;
/*
* User configurable initial values for RDMA transport
* The actual values used may be lower and are limited to hardware capabilities
*/
/* Default maximum number of SGEs in a RDMA write/read */
int smbd_max_frmr_depth = 2048;
/* If payload is less than this byte, use RDMA send/recv not read/write */
int rdma_readwrite_threshold = 4096;
/* Transport logging functions
* Logging are defined as classes. They can be OR'ed to define the actual
* logging level via module parameter smbd_logging_class
* e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and
* log_rdma_event()
*/
#define LOG_OUTGOING 0x1
#define LOG_INCOMING 0x2
#define LOG_READ 0x4
#define LOG_WRITE 0x8
#define LOG_RDMA_SEND 0x10
#define LOG_RDMA_RECV 0x20
#define LOG_KEEP_ALIVE 0x40
#define LOG_RDMA_EVENT 0x80
#define LOG_RDMA_MR 0x100
static unsigned int smbd_logging_class;
module_param(smbd_logging_class, uint, 0644);
MODULE_PARM_DESC(smbd_logging_class,
"Logging class for SMBD transport 0x0 to 0x100");
#define ERR 0x0
#define INFO 0x1
static unsigned int smbd_logging_level = ERR;
module_param(smbd_logging_level, uint, 0644);
MODULE_PARM_DESC(smbd_logging_level,
"Logging level for SMBD transport, 0 (default): error, 1: info");
#define log_rdma(level, class, fmt, args...) \
do { \
if (level <= smbd_logging_level || class & smbd_logging_class) \
cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\
} while (0)
#define log_outgoing(level, fmt, args...) \
log_rdma(level, LOG_OUTGOING, fmt, ##args)
#define log_incoming(level, fmt, args...) \
log_rdma(level, LOG_INCOMING, fmt, ##args)
#define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args)
#define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args)
#define log_rdma_send(level, fmt, args...) \
log_rdma(level, LOG_RDMA_SEND, fmt, ##args)
#define log_rdma_recv(level, fmt, args...) \
log_rdma(level, LOG_RDMA_RECV, fmt, ##args)
#define log_keep_alive(level, fmt, args...) \
log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args)
#define log_rdma_event(level, fmt, args...) \
log_rdma(level, LOG_RDMA_EVENT, fmt, ##args)
#define log_rdma_mr(level, fmt, args...) \
log_rdma(level, LOG_RDMA_MR, fmt, ##args)
/*
* Destroy the transport and related RDMA and memory resources
* Need to go through all the pending counters and make sure on one is using
* the transport while it is destroyed
*/
static void smbd_destroy_rdma_work(struct work_struct *work)
{
struct smbd_response *response;
struct smbd_connection *info =
container_of(work, struct smbd_connection, destroy_work);
unsigned long flags;
log_rdma_event(INFO, "destroying qp\n");
ib_drain_qp(info->id->qp);
rdma_destroy_qp(info->id);
/* Unblock all I/O waiting on the send queue */
wake_up_interruptible_all(&info->wait_send_queue);
log_rdma_event(INFO, "cancelling idle timer\n");
cancel_delayed_work_sync(&info->idle_timer_work);
log_rdma_event(INFO, "cancelling send immediate work\n");
cancel_delayed_work_sync(&info->send_immediate_work);
log_rdma_event(INFO, "wait for all send to finish\n");
wait_event(info->wait_smbd_send_pending,
info->smbd_send_pending == 0);
log_rdma_event(INFO, "wait for all recv to finish\n");
wake_up_interruptible(&info->wait_reassembly_queue);
wait_event(info->wait_smbd_recv_pending,
info->smbd_recv_pending == 0);
log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
wait_event(info->wait_send_pending,
atomic_read(&info->send_pending) == 0);
wait_event(info->wait_send_payload_pending,
atomic_read(&info->send_payload_pending) == 0);
log_rdma_event(INFO, "freeing mr list\n");
wake_up_interruptible_all(&info->wait_mr);
wait_event(info->wait_for_mr_cleanup,
atomic_read(&info->mr_used_count) == 0);
destroy_mr_list(info);
/* It's not posssible for upper layer to get to reassembly */
log_rdma_event(INFO, "drain the reassembly queue\n");
do {
spin_lock_irqsave(&info->reassembly_queue_lock, flags);
response = _get_first_reassembly(info);
if (response) {
list_del(&response->list);
spin_unlock_irqrestore(
&info->reassembly_queue_lock, flags);
put_receive_buffer(info, response);
}
} while (response);
spin_unlock_irqrestore(&info->reassembly_queue_lock, flags);
info->reassembly_data_length = 0;
log_rdma_event(INFO, "free receive buffers\n");
wait_event(info->wait_receive_queues,
info->count_receive_queue + info->count_empty_packet_queue
== info->receive_credit_max);
destroy_receive_buffers(info);
ib_free_cq(info->send_cq);
ib_free_cq(info->recv_cq);
ib_dealloc_pd(info->pd);
rdma_destroy_id(info->id);
/* free mempools */
mempool_destroy(info->request_mempool);
kmem_cache_destroy(info->request_cache);
mempool_destroy(info->response_mempool);
kmem_cache_destroy(info->response_cache);
info->transport_status = SMBD_DESTROYED;
wake_up_all(&info->wait_destroy);
}
static int smbd_process_disconnected(struct smbd_connection *info)
{
schedule_work(&info->destroy_work);
return 0;
}
static void smbd_disconnect_rdma_work(struct work_struct *work)
{
struct smbd_connection *info =
container_of(work, struct smbd_connection, disconnect_work);
if (info->transport_status == SMBD_CONNECTED) {
info->transport_status = SMBD_DISCONNECTING;
rdma_disconnect(info->id);
}
}
static void smbd_disconnect_rdma_connection(struct smbd_connection *info)
{
queue_work(info->workqueue, &info->disconnect_work);
}
/* Upcall from RDMA CM */
static int smbd_conn_upcall(
struct rdma_cm_id *id, struct rdma_cm_event *event)
{
struct smbd_connection *info = id->context;
log_rdma_event(INFO, "event=%d status=%d\n",
event->event, event->status);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
info->ri_rc = 0;
complete(&info->ri_done);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
info->ri_rc = -EHOSTUNREACH;
complete(&info->ri_done);
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
info->ri_rc = -ENETUNREACH;
complete(&info->ri_done);
break;
case RDMA_CM_EVENT_ESTABLISHED:
log_rdma_event(INFO, "connected event=%d\n", event->event);
info->transport_status = SMBD_CONNECTED;
wake_up_interruptible(&info->conn_wait);
break;
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_REJECTED:
log_rdma_event(INFO, "connecting failed event=%d\n", event->event);
info->transport_status = SMBD_DISCONNECTED;
wake_up_interruptible(&info->conn_wait);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_DISCONNECTED:
/* This happenes when we fail the negotiation */
if (info->transport_status == SMBD_NEGOTIATE_FAILED) {
info->transport_status = SMBD_DISCONNECTED;
wake_up(&info->conn_wait);
break;
}
info->transport_status = SMBD_DISCONNECTED;
smbd_process_disconnected(info);
break;
default:
break;
}
return 0;
}
/* Upcall from RDMA QP */
static void
smbd_qp_async_error_upcall(struct ib_event *event, void *context)
{
struct smbd_connection *info = context;
log_rdma_event(ERR, "%s on device %s info %p\n",
ib_event_msg(event->event), event->device->name, info);
switch (event->event) {
case IB_EVENT_CQ_ERR:
case IB_EVENT_QP_FATAL:
smbd_disconnect_rdma_connection(info);
default:
break;
}
}
static inline void *smbd_request_payload(struct smbd_request *request)
{
return (void *)request->packet;
}
static inline void *smbd_response_payload(struct smbd_response *response)
{
return (void *)response->packet;
}
/* Called when a RDMA send is done */
static void send_done(struct ib_cq *cq, struct ib_wc *wc)
{
int i;
struct smbd_request *request =
container_of(wc->wr_cqe, struct smbd_request, cqe);
log_rdma_send(INFO, "smbd_request %p completed wc->status=%d\n",
request, wc->status);
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n",
wc->status, wc->opcode);
smbd_disconnect_rdma_connection(request->info);
}
for (i = 0; i < request->num_sge; i++)
ib_dma_unmap_single(request->info->id->device,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
if (request->has_payload) {
if (atomic_dec_and_test(&request->info->send_payload_pending))
wake_up(&request->info->wait_send_payload_pending);
} else {
if (atomic_dec_and_test(&request->info->send_pending))
wake_up(&request->info->wait_send_pending);
}
mempool_free(request, request->info->request_mempool);
}
static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp)
{
log_rdma_event(INFO, "resp message min_version %u max_version %u "
"negotiated_version %u credits_requested %u "
"credits_granted %u status %u max_readwrite_size %u "
"preferred_send_size %u max_receive_size %u "
"max_fragmented_size %u\n",
resp->min_version, resp->max_version, resp->negotiated_version,
resp->credits_requested, resp->credits_granted, resp->status,
resp->max_readwrite_size, resp->preferred_send_size,
resp->max_receive_size, resp->max_fragmented_size);
}
/*
* Process a negotiation response message, according to [MS-SMBD]3.1.5.7
* response, packet_length: the negotiation response message
* return value: true if negotiation is a success, false if failed
*/
static bool process_negotiation_response(
struct smbd_response *response, int packet_length)
{
struct smbd_connection *info = response->info;
struct smbd_negotiate_resp *packet = smbd_response_payload(response);
if (packet_length < sizeof(struct smbd_negotiate_resp)) {
log_rdma_event(ERR,
"error: packet_length=%d\n", packet_length);
return false;
}
if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) {
log_rdma_event(ERR, "error: negotiated_version=%x\n",
le16_to_cpu(packet->negotiated_version));
return false;
}
info->protocol = le16_to_cpu(packet->negotiated_version);
if (packet->credits_requested == 0) {
log_rdma_event(ERR, "error: credits_requested==0\n");
return false;
}
info->receive_credit_target = le16_to_cpu(packet->credits_requested);
if (packet->credits_granted == 0) {
log_rdma_event(ERR, "error: credits_granted==0\n");
return false;
}
atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted));
atomic_set(&info->receive_credits, 0);
if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) {
log_rdma_event(ERR, "error: preferred_send_size=%d\n",
le32_to_cpu(packet->preferred_send_size));
return false;
}
info->max_receive_size = le32_to_cpu(packet->preferred_send_size);
if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) {
log_rdma_event(ERR, "error: max_receive_size=%d\n",
le32_to_cpu(packet->max_receive_size));
return false;
}
info->max_send_size = min_t(int, info->max_send_size,
le32_to_cpu(packet->max_receive_size));
if (le32_to_cpu(packet->max_fragmented_size) <
SMBD_MIN_FRAGMENTED_SIZE) {
log_rdma_event(ERR, "error: max_fragmented_size=%d\n",
le32_to_cpu(packet->max_fragmented_size));
return false;
}
info->max_fragmented_send_size =
le32_to_cpu(packet->max_fragmented_size);
info->rdma_readwrite_threshold =
rdma_readwrite_threshold > info->max_fragmented_send_size ?
info->max_fragmented_send_size :
rdma_readwrite_threshold;
info->max_readwrite_size = min_t(u32,
le32_to_cpu(packet->max_readwrite_size),
info->max_frmr_depth * PAGE_SIZE);
info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE;
return true;
}
/*
* Check and schedule to send an immediate packet
* This is used to extend credtis to remote peer to keep the transport busy
*/
static void check_and_send_immediate(struct smbd_connection *info)
{
if (info->transport_status != SMBD_CONNECTED)
return;
info->send_immediate = true;
/*
* Promptly send a packet if our peer is running low on receive
* credits
*/
if (atomic_read(&info->receive_credits) <
info->receive_credit_target - 1)
queue_delayed_work(
info->workqueue, &info->send_immediate_work, 0);
}
static void smbd_post_send_credits(struct work_struct *work)
{
int ret = 0;
int use_receive_queue = 1;
int rc;
struct smbd_response *response;
struct smbd_connection *info =
container_of(work, struct smbd_connection,
post_send_credits_work);
if (info->transport_status != SMBD_CONNECTED) {
wake_up(&info->wait_receive_queues);
return;
}
if (info->receive_credit_target >
atomic_read(&info->receive_credits)) {
while (true) {
if (use_receive_queue)
response = get_receive_buffer(info);
else
response = get_empty_queue_buffer(info);
if (!response) {
/* now switch to emtpy packet queue */
if (use_receive_queue) {
use_receive_queue = 0;
continue;
} else
break;
}
response->type = SMBD_TRANSFER_DATA;
response->first_segment = false;
rc = smbd_post_recv(info, response);
if (rc) {
log_rdma_recv(ERR,
"post_recv failed rc=%d\n", rc);
put_receive_buffer(info, response);
break;
}
ret++;
}
}
spin_lock(&info->lock_new_credits_offered);
info->new_credits_offered += ret;
spin_unlock(&info->lock_new_credits_offered);
atomic_add(ret, &info->receive_credits);
/* Check if we can post new receive and grant credits to peer */
check_and_send_immediate(info);
}
static void smbd_recv_done_work(struct work_struct *work)
{
struct smbd_connection *info =
container_of(work, struct smbd_connection, recv_done_work);
/*
* We may have new send credits granted from remote peer
* If any sender is blcoked on lack of credets, unblock it
*/
if (atomic_read(&info->send_credits))
wake_up_interruptible(&info->wait_send_queue);
/*
* Check if we need to send something to remote peer to
* grant more credits or respond to KEEP_ALIVE packet
*/
check_and_send_immediate(info);
}
/* Called from softirq, when recv is done */
static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct smbd_data_transfer *data_transfer;
struct smbd_response *response =
container_of(wc->wr_cqe, struct smbd_response, cqe);
struct smbd_connection *info = response->info;
int data_length = 0;
log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d "
"byte_len=%d pkey_index=%x\n",
response, response->type, wc->status, wc->opcode,
wc->byte_len, wc->pkey_index);
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
log_rdma_recv(INFO, "wc->status=%d opcode=%d\n",
wc->status, wc->opcode);
smbd_disconnect_rdma_connection(info);
goto error;
}
ib_dma_sync_single_for_cpu(
wc->qp->device,
response->sge.addr,
response->sge.length,
DMA_FROM_DEVICE);
switch (response->type) {
/* SMBD negotiation response */
case SMBD_NEGOTIATE_RESP:
dump_smbd_negotiate_resp(smbd_response_payload(response));
info->full_packet_received = true;
info->negotiate_done =
process_negotiation_response(response, wc->byte_len);
complete(&info->negotiate_completion);
break;
/* SMBD data transfer packet */
case SMBD_TRANSFER_DATA:
data_transfer = smbd_response_payload(response);
data_length = le32_to_cpu(data_transfer->data_length);
/*
* If this is a packet with data playload place the data in
* reassembly queue and wake up the reading thread
*/
if (data_length) {
if (info->full_packet_received)
response->first_segment = true;
if (le32_to_cpu(data_transfer->remaining_data_length))
info->full_packet_received = false;
else
info->full_packet_received = true;
enqueue_reassembly(
info,
response,
data_length);
} else
put_empty_packet(info, response);
if (data_length)
wake_up_interruptible(&info->wait_reassembly_queue);
atomic_dec(&info->receive_credits);
info->receive_credit_target =
le16_to_cpu(data_transfer->credits_requested);
atomic_add(le16_to_cpu(data_transfer->credits_granted),
&info->send_credits);
log_incoming(INFO, "data flags %d data_offset %d "
"data_length %d remaining_data_length %d\n",
le16_to_cpu(data_transfer->flags),
le32_to_cpu(data_transfer->data_offset),
le32_to_cpu(data_transfer->data_length),
le32_to_cpu(data_transfer->remaining_data_length));
/* Send a KEEP_ALIVE response right away if requested */
info->keep_alive_requested = KEEP_ALIVE_NONE;
if (le16_to_cpu(data_transfer->flags) &
SMB_DIRECT_RESPONSE_REQUESTED) {
info->keep_alive_requested = KEEP_ALIVE_PENDING;
}
queue_work(info->workqueue, &info->recv_done_work);
return;
default:
log_rdma_recv(ERR,
"unexpected response type=%d\n", response->type);
}
error:
put_receive_buffer(info, response);
}
static struct rdma_cm_id *smbd_create_id(
struct smbd_connection *info,
struct sockaddr *dstaddr, int port)
{
struct rdma_cm_id *id;
int rc;
__be16 *sport;
id = rdma_create_id(&init_net, smbd_conn_upcall, info,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc);
return id;
}
if (dstaddr->sa_family == AF_INET6)
sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
else
sport = &((struct sockaddr_in *)dstaddr)->sin_port;
*sport = htons(port);
init_completion(&info->ri_done);
info->ri_rc = -ETIMEDOUT;
rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr,
RDMA_RESOLVE_TIMEOUT);
if (rc) {
log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc);
goto out;
}
wait_for_completion_interruptible_timeout(
&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
rc = info->ri_rc;
if (rc) {
log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc);
goto out;
}
info->ri_rc = -ETIMEDOUT;
rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
if (rc) {
log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc);
goto out;
}
wait_for_completion_interruptible_timeout(
&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
rc = info->ri_rc;
if (rc) {
log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc);
goto out;
}
return id;
out:
rdma_destroy_id(id);
return ERR_PTR(rc);
}
/*
* Test if FRWR (Fast Registration Work Requests) is supported on the device
* This implementation requries FRWR on RDMA read/write
* return value: true if it is supported
*/
static bool frwr_is_supported(struct ib_device_attr *attrs)
{
if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
return false;
if (attrs->max_fast_reg_page_list_len == 0)
return false;
return true;
}
static int smbd_ia_open(
struct smbd_connection *info,
struct sockaddr *dstaddr, int port)
{
int rc;
info->id = smbd_create_id(info, dstaddr, port);
if (IS_ERR(info->id)) {
rc = PTR_ERR(info->id);
goto out1;
}
if (!frwr_is_supported(&info->id->device->attrs)) {
log_rdma_event(ERR,
"Fast Registration Work Requests "
"(FRWR) is not supported\n");
log_rdma_event(ERR,
"Device capability flags = %llx "
"max_fast_reg_page_list_len = %u\n",
info->id->device->attrs.device_cap_flags,
info->id->device->attrs.max_fast_reg_page_list_len);
rc = -EPROTONOSUPPORT;
goto out2;
}
info->max_frmr_depth = min_t(int,
smbd_max_frmr_depth,
info->id->device->attrs.max_fast_reg_page_list_len);
info->mr_type = IB_MR_TYPE_MEM_REG;
if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
info->mr_type = IB_MR_TYPE_SG_GAPS;
info->pd = ib_alloc_pd(info->id->device, 0);
if (IS_ERR(info->pd)) {
rc = PTR_ERR(info->pd);
log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc);
goto out2;
}
return 0;
out2:
rdma_destroy_id(info->id);
info->id = NULL;
out1:
return rc;
}
/*
* Send a negotiation request message to the peer
* The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3
* After negotiation, the transport is connected and ready for
* carrying upper layer SMB payload
*/
static int smbd_post_send_negotiate_req(struct smbd_connection *info)
{
struct ib_send_wr send_wr, *send_wr_fail;
int rc = -ENOMEM;
struct smbd_request *request;
struct smbd_negotiate_req *packet;
request = mempool_alloc(info->request_mempool, GFP_KERNEL);
if (!request)
return rc;
request->info = info;
packet = smbd_request_payload(request);
packet->min_version = cpu_to_le16(SMBD_V1);
packet->max_version = cpu_to_le16(SMBD_V1);
packet->reserved = 0;
packet->credits_requested = cpu_to_le16(info->send_credit_target);
packet->preferred_send_size = cpu_to_le32(info->max_send_size);
packet->max_receive_size = cpu_to_le32(info->max_receive_size);
packet->max_fragmented_size =
cpu_to_le32(info->max_fragmented_recv_size);
request->num_sge = 1;
request->sge[0].addr = ib_dma_map_single(
info->id->device, (void *)packet,
sizeof(*packet), DMA_TO_DEVICE);
if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
rc = -EIO;
goto dma_mapping_failed;
}
request->sge[0].length = sizeof(*packet);
request->sge[0].lkey = info->pd->local_dma_lkey;
ib_dma_sync_single_for_device(
info->id->device, request->sge[0].addr,
request->sge[0].length, DMA_TO_DEVICE);
request->cqe.done = send_done;
send_wr.next = NULL;
send_wr.wr_cqe = &request->cqe;
send_wr.sg_list = request->sge;
send_wr.num_sge = request->num_sge;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
log_rdma_send(INFO, "sge addr=%llx length=%x lkey=%x\n",
request->sge[0].addr,
request->sge[0].length, request->sge[0].lkey);
request->has_payload = false;
atomic_inc(&info->send_pending);
rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
if (!rc)
return 0;
/* if we reach here, post send failed */
log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
atomic_dec(&info->send_pending);
ib_dma_unmap_single(info->id->device, request->sge[0].addr,
request->sge[0].length, DMA_TO_DEVICE);
dma_mapping_failed:
mempool_free(request, info->request_mempool);
return rc;
}
/*
* Extend the credits to remote peer
* This implements [MS-SMBD] 3.1.5.9
* The idea is that we should extend credits to remote peer as quickly as
* it's allowed, to maintain data flow. We allocate as much receive
* buffer as possible, and extend the receive credits to remote peer
* return value: the new credtis being granted.
*/
static int manage_credits_prior_sending(struct smbd_connection *info)
{
int new_credits;
spin_lock(&info->lock_new_credits_offered);
new_credits = info->new_credits_offered;
info->new_credits_offered = 0;
spin_unlock(&info->lock_new_credits_offered);
return new_credits;
}
/*
* Check if we need to send a KEEP_ALIVE message
* The idle connection timer triggers a KEEP_ALIVE message when expires
* SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send
* back a response.
* return value:
* 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set
* 0: otherwise
*/
static int manage_keep_alive_before_sending(struct smbd_connection *info)
{
if (info->keep_alive_requested == KEEP_ALIVE_PENDING) {
info->keep_alive_requested = KEEP_ALIVE_SENT;
return 1;
}
return 0;
}
/*
* Build and prepare the SMBD packet header
* This function waits for avaialbe send credits and build a SMBD packet
* header. The caller then optional append payload to the packet after
* the header
* intput values
* size: the size of the payload
* remaining_data_length: remaining data to send if this is part of a
* fragmented packet
* output values
* request_out: the request allocated from this function
* return values: 0 on success, otherwise actual error code returned
*/
static int smbd_create_header(struct smbd_connection *info,
int size, int remaining_data_length,
struct smbd_request **request_out)
{
struct smbd_request *request;
struct smbd_data_transfer *packet;
int header_length;
int rc;
/* Wait for send credits. A SMBD packet needs one credit */
rc = wait_event_interruptible(info->wait_send_queue,
atomic_read(&info->send_credits) > 0 ||
info->transport_status != SMBD_CONNECTED);
if (rc)
return rc;
if (info->transport_status != SMBD_CONNECTED) {
log_outgoing(ERR, "disconnected not sending\n");
return -ENOENT;
}
atomic_dec(&info->send_credits);
request = mempool_alloc(info->request_mempool, GFP_KERNEL);
if (!request) {
rc = -ENOMEM;
goto err;
}
request->info = info;
/* Fill in the packet header */
packet = smbd_request_payload(request);
packet->credits_requested = cpu_to_le16(info->send_credit_target);
packet->credits_granted =
cpu_to_le16(manage_credits_prior_sending(info));
info->send_immediate = false;
packet->flags = 0;
if (manage_keep_alive_before_sending(info))
packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
packet->reserved = 0;
if (!size)
packet->data_offset = 0;
else
packet->data_offset = cpu_to_le32(24);
packet->data_length = cpu_to_le32(size);
packet->remaining_data_length = cpu_to_le32(remaining_data_length);
packet->padding = 0;
log_outgoing(INFO, "credits_requested=%d credits_granted=%d "
"data_offset=%d data_length=%d remaining_data_length=%d\n",
le16_to_cpu(packet->credits_requested),
le16_to_cpu(packet->credits_granted),
le32_to_cpu(packet->data_offset),
le32_to_cpu(packet->data_length),
le32_to_cpu(packet->remaining_data_length));
/* Map the packet to DMA */
header_length = sizeof(struct smbd_data_transfer);
/* If this is a packet without payload, don't send padding */
if (!size)
header_length = offsetof(struct smbd_data_transfer, padding);
request->num_sge = 1;
request->sge[0].addr = ib_dma_map_single(info->id->device,
(void *)packet,
header_length,
DMA_BIDIRECTIONAL);
if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
mempool_free(request, info->request_mempool);
rc = -EIO;
goto err;
}
request->sge[0].length = header_length;
request->sge[0].lkey = info->pd->local_dma_lkey;
*request_out = request;
return 0;
err:
atomic_inc(&info->send_credits);
return rc;
}
static void smbd_destroy_header(struct smbd_connection *info,
struct smbd_request *request)
{
ib_dma_unmap_single(info->id->device,
request->sge[0].addr,
request->sge[0].length,
DMA_TO_DEVICE);
mempool_free(request, info->request_mempool);
atomic_inc(&info->send_credits);
}
/* Post the send request */
static int smbd_post_send(struct smbd_connection *info,
struct smbd_request *request, bool has_payload)
{
struct ib_send_wr send_wr, *send_wr_fail;
int rc, i;
for (i = 0; i < request->num_sge; i++) {
log_rdma_send(INFO,
"rdma_request sge[%d] addr=%llu legnth=%u\n",
i, request->sge[0].addr, request->sge[0].length);
ib_dma_sync_single_for_device(
info->id->device,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
}
request->cqe.done = send_done;
send_wr.next = NULL;
send_wr.wr_cqe = &request->cqe;
send_wr.sg_list = request->sge;
send_wr.num_sge = request->num_sge;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
if (has_payload) {
request->has_payload = true;
atomic_inc(&info->send_payload_pending);
} else {
request->has_payload = false;
atomic_inc(&info->send_pending);
}
rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
if (rc) {
log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
if (has_payload) {
if (atomic_dec_and_test(&info->send_payload_pending))
wake_up(&info->wait_send_payload_pending);
} else {
if (atomic_dec_and_test(&info->send_pending))
wake_up(&info->wait_send_pending);
}
} else
/* Reset timer for idle connection after packet is sent */
mod_delayed_work(info->workqueue, &info->idle_timer_work,
info->keep_alive_interval*HZ);
return rc;
}
static int smbd_post_send_sgl(struct smbd_connection *info,
struct scatterlist *sgl, int data_length, int remaining_data_length)
{
int num_sgs;
int i, rc;
struct smbd_request *request;
struct scatterlist *sg;
rc = smbd_create_header(
info, data_length, remaining_data_length, &request);
if (rc)
return rc;
num_sgs = sgl ? sg_nents(sgl) : 0;
for_each_sg(sgl, sg, num_sgs, i) {
request->sge[i+1].addr =
ib_dma_map_page(info->id->device, sg_page(sg),
sg->offset, sg->length, DMA_BIDIRECTIONAL);
if (ib_dma_mapping_error(
info->id->device, request->sge[i+1].addr)) {
rc = -EIO;
request->sge[i+1].addr = 0;
goto dma_mapping_failure;
}
request->sge[i+1].length = sg->length;
request->sge[i+1].lkey = info->pd->local_dma_lkey;
request->num_sge++;
}
rc = smbd_post_send(info, request, data_length);
if (!rc)
return 0;
dma_mapping_failure:
for (i = 1; i < request->num_sge; i++)
if (request->sge[i].addr)
ib_dma_unmap_single(info->id->device,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
smbd_destroy_header(info, request);
return rc;
}
/*
* Send a page
* page: the page to send
* offset: offset in the page to send
* size: length in the page to send
* remaining_data_length: remaining data to send in this payload
*/
static int smbd_post_send_page(struct smbd_connection *info, struct page *page,
unsigned long offset, size_t size, int remaining_data_length)
{
struct scatterlist sgl;
sg_init_table(&sgl, 1);
sg_set_page(&sgl, page, size, offset);
return smbd_post_send_sgl(info, &sgl, size, remaining_data_length);
}
/*
* Send an empty message
* Empty message is used to extend credits to peer to for keep live
* while there is no upper layer payload to send at the time
*/
static int smbd_post_send_empty(struct smbd_connection *info)
{
info->count_send_empty++;
return smbd_post_send_sgl(info, NULL, 0, 0);
}
/*
* Send a data buffer
* iov: the iov array describing the data buffers
* n_vec: number of iov array
* remaining_data_length: remaining data to send following this packet
* in segmented SMBD packet
*/
static int smbd_post_send_data(
struct smbd_connection *info, struct kvec *iov, int n_vec,
int remaining_data_length)
{
int i;
u32 data_length = 0;
struct scatterlist sgl[SMBDIRECT_MAX_SGE];
if (n_vec > SMBDIRECT_MAX_SGE) {
cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
return -ENOMEM;
}
sg_init_table(sgl, n_vec);
for (i = 0; i < n_vec; i++) {
data_length += iov[i].iov_len;
sg_set_buf(&sgl[i], iov[i].iov_base, iov[i].iov_len);
}
return smbd_post_send_sgl(info, sgl, data_length, remaining_data_length);
}
/*
* Post a receive request to the transport
* The remote peer can only send data when a receive request is posted
* The interaction is controlled by send/receive credit system
*/
static int smbd_post_recv(
struct smbd_connection *info, struct smbd_response *response)
{
struct ib_recv_wr recv_wr, *recv_wr_fail = NULL;
int rc = -EIO;
response->sge.addr = ib_dma_map_single(
info->id->device, response->packet,
info->max_receive_size, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(info->id->device, response->sge.addr))
return rc;
response->sge.length = info->max_receive_size;
response->sge.lkey = info->pd->local_dma_lkey;
response->cqe.done = recv_done;
recv_wr.wr_cqe = &response->cqe;
recv_wr.next = NULL;
recv_wr.sg_list = &response->sge;
recv_wr.num_sge = 1;
rc = ib_post_recv(info->id->qp, &recv_wr, &recv_wr_fail);
if (rc) {
ib_dma_unmap_single(info->id->device, response->sge.addr,
response->sge.length, DMA_FROM_DEVICE);
log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc);
}
return rc;
}
/* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */
static int smbd_negotiate(struct smbd_connection *info)
{
int rc;
struct smbd_response *response = get_receive_buffer(info);
response->type = SMBD_NEGOTIATE_RESP;
rc = smbd_post_recv(info, response);
log_rdma_event(INFO,
"smbd_post_recv rc=%d iov.addr=%llx iov.length=%x "
"iov.lkey=%x\n",
rc, response->sge.addr,
response->sge.length, response->sge.lkey);
if (rc)
return rc;
init_completion(&info->negotiate_completion);
info->negotiate_done = false;
rc = smbd_post_send_negotiate_req(info);
if (rc)
return rc;
rc = wait_for_completion_interruptible_timeout(
&info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ);
log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc);
if (info->negotiate_done)
return 0;
if (rc == 0)
rc = -ETIMEDOUT;
else if (rc == -ERESTARTSYS)
rc = -EINTR;
else
rc = -ENOTCONN;
return rc;
}
static void put_empty_packet(
struct smbd_connection *info, struct smbd_response *response)
{
spin_lock(&info->empty_packet_queue_lock);
list_add_tail(&response->list, &info->empty_packet_queue);
info->count_empty_packet_queue++;
spin_unlock(&info->empty_packet_queue_lock);
queue_work(info->workqueue, &info->post_send_credits_work);
}
/*
* Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1
* This is a queue for reassembling upper layer payload and present to upper
* layer. All the inncoming payload go to the reassembly queue, regardless of
* if reassembly is required. The uuper layer code reads from the queue for all
* incoming payloads.
* Put a received packet to the reassembly queue
* response: the packet received
* data_length: the size of payload in this packet
*/
static void enqueue_reassembly(
struct smbd_connection *info,
struct smbd_response *response,
int data_length)
{
spin_lock(&info->reassembly_queue_lock);
list_add_tail(&response->list, &info->reassembly_queue);
info->reassembly_queue_length++;
/*
* Make sure reassembly_data_length is updated after list and
* reassembly_queue_length are updated. On the dequeue side
* reassembly_data_length is checked without a lock to determine
* if reassembly_queue_length and list is up to date
*/
virt_wmb();
info->reassembly_data_length += data_length;
spin_unlock(&info->reassembly_queue_lock);
info->count_reassembly_queue++;
info->count_enqueue_reassembly_queue++;
}
/*
* Get the first entry at the front of reassembly queue
* Caller is responsible for locking
* return value: the first entry if any, NULL if queue is empty
*/
static struct smbd_response *_get_first_reassembly(struct smbd_connection *info)
{
struct smbd_response *ret = NULL;
if (!list_empty(&info->reassembly_queue)) {
ret = list_first_entry(
&info->reassembly_queue,
struct smbd_response, list);
}
return ret;
}
static struct smbd_response *get_empty_queue_buffer(
struct smbd_connection *info)
{
struct smbd_response *ret = NULL;
unsigned long flags;
spin_lock_irqsave(&info->empty_packet_queue_lock, flags);
if (!list_empty(&info->empty_packet_queue)) {
ret = list_first_entry(
&info->empty_packet_queue,
struct smbd_response, list);
list_del(&ret->list);
info->count_empty_packet_queue--;
}
spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags);
return ret;
}
/*
* Get a receive buffer
* For each remote send, we need to post a receive. The receive buffers are
* pre-allocated in advance.
* return value: the receive buffer, NULL if none is available
*/
static struct smbd_response *get_receive_buffer(struct smbd_connection *info)
{
struct smbd_response *ret = NULL;
unsigned long flags;
spin_lock_irqsave(&info->receive_queue_lock, flags);
if (!list_empty(&info->receive_queue)) {
ret = list_first_entry(
&info->receive_queue,
struct smbd_response, list);
list_del(&ret->list);
info->count_receive_queue--;
info->count_get_receive_buffer++;
}
spin_unlock_irqrestore(&info->receive_queue_lock, flags);
return ret;
}
/*
* Return a receive buffer
* Upon returning of a receive buffer, we can post new receive and extend
* more receive credits to remote peer. This is done immediately after a
* receive buffer is returned.
*/
static void put_receive_buffer(
struct smbd_connection *info, struct smbd_response *response)
{
unsigned long flags;
ib_dma_unmap_single(info->id->device, response->sge.addr,
response->sge.length, DMA_FROM_DEVICE);
spin_lock_irqsave(&info->receive_queue_lock, flags);
list_add_tail(&response->list, &info->receive_queue);
info->count_receive_queue++;
info->count_put_receive_buffer++;
spin_unlock_irqrestore(&info->receive_queue_lock, flags);
queue_work(info->workqueue, &info->post_send_credits_work);
}
/* Preallocate all receive buffer on transport establishment */
static int allocate_receive_buffers(struct smbd_connection *info, int num_buf)
{
int i;
struct smbd_response *response;
INIT_LIST_HEAD(&info->reassembly_queue);
spin_lock_init(&info->reassembly_queue_lock);
info->reassembly_data_length = 0;
info->reassembly_queue_length = 0;
INIT_LIST_HEAD(&info->receive_queue);
spin_lock_init(&info->receive_queue_lock);
info->count_receive_queue = 0;
INIT_LIST_HEAD(&info->empty_packet_queue);
spin_lock_init(&info->empty_packet_queue_lock);
info->count_empty_packet_queue = 0;
init_waitqueue_head(&info->wait_receive_queues);
for (i = 0; i < num_buf; i++) {
response = mempool_alloc(info->response_mempool, GFP_KERNEL);
if (!response)
goto allocate_failed;
response->info = info;
list_add_tail(&response->list, &info->receive_queue);
info->count_receive_queue++;
}
return 0;
allocate_failed:
while (!list_empty(&info->receive_queue)) {
response = list_first_entry(
&info->receive_queue,
struct smbd_response, list);
list_del(&response->list);
info->count_receive_queue--;
mempool_free(response, info->response_mempool);
}
return -ENOMEM;
}
static void destroy_receive_buffers(struct smbd_connection *info)
{
struct smbd_response *response;
while ((response = get_receive_buffer(info)))
mempool_free(response, info->response_mempool);
while ((response = get_empty_queue_buffer(info)))
mempool_free(response, info->response_mempool);
}
/*
* Check and send an immediate or keep alive packet
* The condition to send those packets are defined in [MS-SMBD] 3.1.1.1
* Connection.KeepaliveRequested and Connection.SendImmediate
* The idea is to extend credits to server as soon as it becomes available
*/
static void send_immediate_work(struct work_struct *work)
{
struct smbd_connection *info = container_of(
work, struct smbd_connection,
send_immediate_work.work);
if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
info->send_immediate) {
log_keep_alive(INFO, "send an empty message\n");
smbd_post_send_empty(info);
}
}
/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
static void idle_connection_timer(struct work_struct *work)
{
struct smbd_connection *info = container_of(
work, struct smbd_connection,
idle_timer_work.work);
if (info->keep_alive_requested != KEEP_ALIVE_NONE) {
log_keep_alive(ERR,
"error status info->keep_alive_requested=%d\n",
info->keep_alive_requested);
smbd_disconnect_rdma_connection(info);
return;
}
log_keep_alive(INFO, "about to send an empty idle message\n");
smbd_post_send_empty(info);
/* Setup the next idle timeout work */
queue_delayed_work(info->workqueue, &info->idle_timer_work,
info->keep_alive_interval*HZ);
}
/* Destroy this SMBD connection, called from upper layer */
void smbd_destroy(struct smbd_connection *info)
{
log_rdma_event(INFO, "destroying rdma session\n");
/* Kick off the disconnection process */
smbd_disconnect_rdma_connection(info);
log_rdma_event(INFO, "wait for transport being destroyed\n");
wait_event(info->wait_destroy,
info->transport_status == SMBD_DESTROYED);
destroy_workqueue(info->workqueue);
kfree(info);
}
/*
* Reconnect this SMBD connection, called from upper layer
* return value: 0 on success, or actual error code
*/
int smbd_reconnect(struct TCP_Server_Info *server)
{
log_rdma_event(INFO, "reconnecting rdma session\n");
if (!server->smbd_conn) {
log_rdma_event(ERR, "rdma session already destroyed\n");
return -EINVAL;
}
/*
* This is possible if transport is disconnected and we haven't received
* notification from RDMA, but upper layer has detected timeout
*/
if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
log_rdma_event(INFO, "disconnecting transport\n");
smbd_disconnect_rdma_connection(server->smbd_conn);
}
/* wait until the transport is destroyed */
wait_event(server->smbd_conn->wait_destroy,
server->smbd_conn->transport_status == SMBD_DESTROYED);
destroy_workqueue(server->smbd_conn->workqueue);
kfree(server->smbd_conn);
log_rdma_event(INFO, "creating rdma session\n");
server->smbd_conn = smbd_get_connection(
server, (struct sockaddr *) &server->dstaddr);
return server->smbd_conn ? 0 : -ENOENT;
}
static void destroy_caches_and_workqueue(struct smbd_connection *info)
{
destroy_receive_buffers(info);
destroy_workqueue(info->workqueue);
mempool_destroy(info->response_mempool);
kmem_cache_destroy(info->response_cache);
mempool_destroy(info->request_mempool);
kmem_cache_destroy(info->request_cache);
}
#define MAX_NAME_LEN 80
static int allocate_caches_and_workqueue(struct smbd_connection *info)
{
char name[MAX_NAME_LEN];
int rc;
snprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
info->request_cache =
kmem_cache_create(
name,
sizeof(struct smbd_request) +
sizeof(struct smbd_data_transfer),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!info->request_cache)
return -ENOMEM;
info->request_mempool =
mempool_create(info->send_credit_target, mempool_alloc_slab,
mempool_free_slab, info->request_cache);
if (!info->request_mempool)
goto out1;
snprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
info->response_cache =
kmem_cache_create(
name,
sizeof(struct smbd_response) +
info->max_receive_size,
0, SLAB_HWCACHE_ALIGN, NULL);
if (!info->response_cache)
goto out2;
info->response_mempool =
mempool_create(info->receive_credit_max, mempool_alloc_slab,
mempool_free_slab, info->response_cache);
if (!info->response_mempool)
goto out3;
snprintf(name, MAX_NAME_LEN, "smbd_%p", info);
info->workqueue = create_workqueue(name);
if (!info->workqueue)
goto out4;
rc = allocate_receive_buffers(info, info->receive_credit_max);
if (rc) {
log_rdma_event(ERR, "failed to allocate receive buffers\n");
goto out5;
}
return 0;
out5:
destroy_workqueue(info->workqueue);
out4:
mempool_destroy(info->response_mempool);
out3:
kmem_cache_destroy(info->response_cache);
out2:
mempool_destroy(info->request_mempool);
out1:
kmem_cache_destroy(info->request_cache);
return -ENOMEM;
}
/* Create a SMBD connection, called by upper layer */
static struct smbd_connection *_smbd_get_connection(
struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port)
{
int rc;
struct smbd_connection *info;
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr;
struct ib_port_immutable port_immutable;
u32 ird_ord_hdr[2];
info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL);
if (!info)
return NULL;
info->transport_status = SMBD_CONNECTING;
rc = smbd_ia_open(info, dstaddr, port);
if (rc) {
log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc);
goto create_id_failed;
}
if (smbd_send_credit_target > info->id->device->attrs.max_cqe ||
smbd_send_credit_target > info->id->device->attrs.max_qp_wr) {
log_rdma_event(ERR,
"consider lowering send_credit_target = %d. "
"Possible CQE overrun, device "
"reporting max_cpe %d max_qp_wr %d\n",
smbd_send_credit_target,
info->id->device->attrs.max_cqe,
info->id->device->attrs.max_qp_wr);
goto config_failed;
}
if (smbd_receive_credit_max > info->id->device->attrs.max_cqe ||
smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) {
log_rdma_event(ERR,
"consider lowering receive_credit_max = %d. "
"Possible CQE overrun, device "
"reporting max_cpe %d max_qp_wr %d\n",
smbd_receive_credit_max,
info->id->device->attrs.max_cqe,
info->id->device->attrs.max_qp_wr);
goto config_failed;
}
info->receive_credit_max = smbd_receive_credit_max;
info->send_credit_target = smbd_send_credit_target;
info->max_send_size = smbd_max_send_size;
info->max_fragmented_recv_size = smbd_max_fragmented_recv_size;
info->max_receive_size = smbd_max_receive_size;
info->keep_alive_interval = smbd_keep_alive_interval;
if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) {
log_rdma_event(ERR, "warning: device max_sge = %d too small\n",
info->id->device->attrs.max_sge);
log_rdma_event(ERR, "Queue Pair creation may fail\n");
}
info->send_cq = NULL;
info->recv_cq = NULL;
info->send_cq = ib_alloc_cq(info->id->device, info,
info->send_credit_target, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(info->send_cq)) {
info->send_cq = NULL;
goto alloc_cq_failed;
}
info->recv_cq = ib_alloc_cq(info->id->device, info,
info->receive_credit_max, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(info->recv_cq)) {
info->recv_cq = NULL;
goto alloc_cq_failed;
}
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.event_handler = smbd_qp_async_error_upcall;
qp_attr.qp_context = info;
qp_attr.cap.max_send_wr = info->send_credit_target;
qp_attr.cap.max_recv_wr = info->receive_credit_max;
qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE;
qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE;
qp_attr.cap.max_inline_data = 0;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = info->send_cq;
qp_attr.recv_cq = info->recv_cq;
qp_attr.port_num = ~0;
rc = rdma_create_qp(info->id, info->pd, &qp_attr);
if (rc) {
log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc);
goto create_qp_failed;
}
memset(&conn_param, 0, sizeof(conn_param));
conn_param.initiator_depth = 0;
conn_param.responder_resources =
info->id->device->attrs.max_qp_rd_atom
< SMBD_CM_RESPONDER_RESOURCES ?
info->id->device->attrs.max_qp_rd_atom :
SMBD_CM_RESPONDER_RESOURCES;
info->responder_resources = conn_param.responder_resources;
log_rdma_mr(INFO, "responder_resources=%d\n",
info->responder_resources);
/* Need to send IRD/ORD in private data for iWARP */
info->id->device->get_port_immutable(
info->id->device, info->id->port_num, &port_immutable);
if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
ird_ord_hdr[0] = info->responder_resources;
ird_ord_hdr[1] = 1;
conn_param.private_data = ird_ord_hdr;
conn_param.private_data_len = sizeof(ird_ord_hdr);
} else {
conn_param.private_data = NULL;
conn_param.private_data_len = 0;
}
conn_param.retry_count = SMBD_CM_RETRY;
conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY;
conn_param.flow_control = 0;
init_waitqueue_head(&info->wait_destroy);
log_rdma_event(INFO, "connecting to IP %pI4 port %d\n",
&addr_in->sin_addr, port);
init_waitqueue_head(&info->conn_wait);
rc = rdma_connect(info->id, &conn_param);
if (rc) {
log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
goto rdma_connect_failed;
}
wait_event_interruptible(
info->conn_wait, info->transport_status != SMBD_CONNECTING);
if (info->transport_status != SMBD_CONNECTED) {
log_rdma_event(ERR, "rdma_connect failed port=%d\n", port);
goto rdma_connect_failed;
}
log_rdma_event(INFO, "rdma_connect connected\n");
rc = allocate_caches_and_workqueue(info);
if (rc) {
log_rdma_event(ERR, "cache allocation failed\n");
goto allocate_cache_failed;
}
init_waitqueue_head(&info->wait_send_queue);
init_waitqueue_head(&info->wait_reassembly_queue);
INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work);
queue_delayed_work(info->workqueue, &info->idle_timer_work,
info->keep_alive_interval*HZ);
init_waitqueue_head(&info->wait_smbd_send_pending);
info->smbd_send_pending = 0;
init_waitqueue_head(&info->wait_smbd_recv_pending);
info->smbd_recv_pending = 0;
init_waitqueue_head(&info->wait_send_pending);
atomic_set(&info->send_pending, 0);
init_waitqueue_head(&info->wait_send_payload_pending);
atomic_set(&info->send_payload_pending, 0);
INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work);
INIT_WORK(&info->destroy_work, smbd_destroy_rdma_work);
INIT_WORK(&info->recv_done_work, smbd_recv_done_work);
INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits);
info->new_credits_offered = 0;
spin_lock_init(&info->lock_new_credits_offered);
rc = smbd_negotiate(info);
if (rc) {
log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc);
goto negotiation_failed;
}
rc = allocate_mr_list(info);
if (rc) {
log_rdma_mr(ERR, "memory registration allocation failed\n");
goto allocate_mr_failed;
}
return info;
allocate_mr_failed:
/* At this point, need to a full transport shutdown */
smbd_destroy(info);
return NULL;
negotiation_failed:
cancel_delayed_work_sync(&info->idle_timer_work);
destroy_caches_and_workqueue(info);
info->transport_status = SMBD_NEGOTIATE_FAILED;
init_waitqueue_head(&info->conn_wait);
rdma_disconnect(info->id);
wait_event(info->conn_wait,
info->transport_status == SMBD_DISCONNECTED);
allocate_cache_failed:
rdma_connect_failed:
rdma_destroy_qp(info->id);
create_qp_failed:
alloc_cq_failed:
if (info->send_cq)
ib_free_cq(info->send_cq);
if (info->recv_cq)
ib_free_cq(info->recv_cq);
config_failed:
ib_dealloc_pd(info->pd);
rdma_destroy_id(info->id);
create_id_failed:
kfree(info);
return NULL;
}
struct smbd_connection *smbd_get_connection(
struct TCP_Server_Info *server, struct sockaddr *dstaddr)
{
struct smbd_connection *ret;
int port = SMBD_PORT;
try_again:
ret = _smbd_get_connection(server, dstaddr, port);
/* Try SMB_PORT if SMBD_PORT doesn't work */
if (!ret && port == SMBD_PORT) {
port = SMB_PORT;
goto try_again;
}
return ret;
}
/*
* Receive data from receive reassembly queue
* All the incoming data packets are placed in reassembly queue
* buf: the buffer to read data into
* size: the length of data to read
* return value: actual data read
* Note: this implementation copies the data from reassebmly queue to receive
* buffers used by upper layer. This is not the optimal code path. A better way
* to do it is to not have upper layer allocate its receive buffers but rather
* borrow the buffer from reassembly queue, and return it after data is
* consumed. But this will require more changes to upper layer code, and also
* need to consider packet boundaries while they still being reassembled.
*/
static int smbd_recv_buf(struct smbd_connection *info, char *buf,
unsigned int size)
{
struct smbd_response *response;
struct smbd_data_transfer *data_transfer;
int to_copy, to_read, data_read, offset;
u32 data_length, remaining_data_length, data_offset;
int rc;
again:
if (info->transport_status != SMBD_CONNECTED) {
log_read(ERR, "disconnected\n");
return -ENODEV;
}
/*
* No need to hold the reassembly queue lock all the time as we are
* the only one reading from the front of the queue. The transport
* may add more entries to the back of the queue at the same time
*/
log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size,
info->reassembly_data_length);
if (info->reassembly_data_length >= size) {
int queue_length;
int queue_removed = 0;
/*
* Need to make sure reassembly_data_length is read before
* reading reassembly_queue_length and calling
* _get_first_reassembly. This call is lock free
* as we never read at the end of the queue which are being
* updated in SOFTIRQ as more data is received
*/
virt_rmb();
queue_length = info->reassembly_queue_length;
data_read = 0;
to_read = size;
offset = info->first_entry_offset;
while (data_read < size) {
response = _get_first_reassembly(info);
data_transfer = smbd_response_payload(response);
data_length = le32_to_cpu(data_transfer->data_length);
remaining_data_length =
le32_to_cpu(
data_transfer->remaining_data_length);
data_offset = le32_to_cpu(data_transfer->data_offset);
/*
* The upper layer expects RFC1002 length at the
* beginning of the payload. Return it to indicate
* the total length of the packet. This minimize the
* change to upper layer packet processing logic. This
* will be eventually remove when an intermediate
* transport layer is added
*/
if (response->first_segment && size == 4) {
unsigned int rfc1002_len =
data_length + remaining_data_length;
*((__be32 *)buf) = cpu_to_be32(rfc1002_len);
data_read = 4;
response->first_segment = false;
log_read(INFO, "returning rfc1002 length %d\n",
rfc1002_len);
goto read_rfc1002_done;
}
to_copy = min_t(int, data_length - offset, to_read);
memcpy(
buf + data_read,
(char *)data_transfer + data_offset + offset,
to_copy);
/* move on to the next buffer? */
if (to_copy == data_length - offset) {
queue_length--;
/*
* No need to lock if we are not at the
* end of the queue
*/
if (!queue_length)
spin_lock_irq(
&info->reassembly_queue_lock);
list_del(&response->list);
queue_removed++;
if (!queue_length)
spin_unlock_irq(
&info->reassembly_queue_lock);
info->count_reassembly_queue--;
info->count_dequeue_reassembly_queue++;
put_receive_buffer(info, response);
offset = 0;
log_read(INFO, "put_receive_buffer offset=0\n");
} else
offset += to_copy;
to_read -= to_copy;
data_read += to_copy;
log_read(INFO, "_get_first_reassembly memcpy %d bytes "
"data_transfer_length-offset=%d after that "
"to_read=%d data_read=%d offset=%d\n",
to_copy, data_length - offset,
to_read, data_read, offset);
}
spin_lock_irq(&info->reassembly_queue_lock);
info->reassembly_data_length -= data_read;
info->reassembly_queue_length -= queue_removed;
spin_unlock_irq(&info->reassembly_queue_lock);
info->first_entry_offset = offset;
log_read(INFO, "returning to thread data_read=%d "
"reassembly_data_length=%d first_entry_offset=%d\n",
data_read, info->reassembly_data_length,
info->first_entry_offset);
read_rfc1002_done:
return data_read;
}
log_read(INFO, "wait_event on more data\n");
rc = wait_event_interruptible(
info->wait_reassembly_queue,
info->reassembly_data_length >= size ||
info->transport_status != SMBD_CONNECTED);
/* Don't return any data if interrupted */
if (rc)
return -ENODEV;
goto again;
}
/*
* Receive a page from receive reassembly queue
* page: the page to read data into
* to_read: the length of data to read
* return value: actual data read
*/
static int smbd_recv_page(struct smbd_connection *info,
struct page *page, unsigned int to_read)
{
int ret;
char *to_address;
/* make sure we have the page ready for read */
ret = wait_event_interruptible(
info->wait_reassembly_queue,
info->reassembly_data_length >= to_read ||
info->transport_status != SMBD_CONNECTED);
if (ret)
return 0;
/* now we can read from reassembly queue and not sleep */
to_address = kmap_atomic(page);
log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
page, to_address, to_read);
ret = smbd_recv_buf(info, to_address, to_read);
kunmap_atomic(to_address);
return ret;
}
/*
* Receive data from transport
* msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC
* return: total bytes read, or 0. SMB Direct will not do partial read.
*/
int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
{
char *buf;
struct page *page;
unsigned int to_read;
int rc;
info->smbd_recv_pending++;
switch (msg->msg_iter.type) {
case READ | ITER_KVEC:
buf = msg->msg_iter.kvec->iov_base;
to_read = msg->msg_iter.kvec->iov_len;
rc = smbd_recv_buf(info, buf, to_read);
break;
case READ | ITER_BVEC:
page = msg->msg_iter.bvec->bv_page;
to_read = msg->msg_iter.bvec->bv_len;
rc = smbd_recv_page(info, page, to_read);
break;
default:
/* It's a bug in upper layer to get there */
cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
msg->msg_iter.type);
rc = -EIO;
}
info->smbd_recv_pending--;
wake_up(&info->wait_smbd_recv_pending);
/* SMBDirect will read it all or nothing */
if (rc > 0)
msg->msg_iter.count = 0;
return rc;
}
/*
* Send data to transport
* Each rqst is transported as a SMBDirect payload
* rqst: the data to write
* return value: 0 if successfully write, otherwise error code
*/
int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
{
struct kvec vec;
int nvecs;
int size;
int buflen = 0, remaining_data_length;
int start, i, j;
int max_iov_size =
info->max_send_size - sizeof(struct smbd_data_transfer);
struct kvec iov[SMBDIRECT_MAX_SGE];
int rc;
info->smbd_send_pending++;
if (info->transport_status != SMBD_CONNECTED) {
rc = -ENODEV;
goto done;
}
/*
* This usually means a configuration error
* We use RDMA read/write for packet size > rdma_readwrite_threshold
* as long as it's properly configured we should never get into this
* situation
*/
if (rqst->rq_nvec + rqst->rq_npages > SMBDIRECT_MAX_SGE) {
log_write(ERR, "maximum send segment %x exceeding %x\n",
rqst->rq_nvec + rqst->rq_npages, SMBDIRECT_MAX_SGE);
rc = -EINVAL;
goto done;
}
/*
* Remove the RFC1002 length defined in MS-SMB2 section 2.1
* It is used only for TCP transport
* In future we may want to add a transport layer under protocol
* layer so this will only be issued to TCP transport
*/
iov[0].iov_base = (char *)rqst->rq_iov[0].iov_base + 4;
iov[0].iov_len = rqst->rq_iov[0].iov_len - 4;
buflen += iov[0].iov_len;
/* total up iov array first */
for (i = 1; i < rqst->rq_nvec; i++) {
iov[i].iov_base = rqst->rq_iov[i].iov_base;
iov[i].iov_len = rqst->rq_iov[i].iov_len;
buflen += iov[i].iov_len;
}
/* add in the page array if there is one */
if (rqst->rq_npages) {
buflen += rqst->rq_pagesz * (rqst->rq_npages - 1);
buflen += rqst->rq_tailsz;
}
if (buflen + sizeof(struct smbd_data_transfer) >
info->max_fragmented_send_size) {
log_write(ERR, "payload size %d > max size %d\n",
buflen, info->max_fragmented_send_size);
rc = -EINVAL;
goto done;
}
remaining_data_length = buflen;
log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
"rq_tailsz=%d buflen=%d\n",
rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
rqst->rq_tailsz, buflen);
start = i = iov[0].iov_len ? 0 : 1;
buflen = 0;
while (true) {
buflen += iov[i].iov_len;
if (buflen > max_iov_size) {
if (i > start) {
remaining_data_length -=
(buflen-iov[i].iov_len);
log_write(INFO, "sending iov[] from start=%d "
"i=%d nvecs=%d "
"remaining_data_length=%d\n",
start, i, i-start,
remaining_data_length);
rc = smbd_post_send_data(
info, &iov[start], i-start,
remaining_data_length);
if (rc)
goto done;
} else {
/* iov[start] is too big, break it */
nvecs = (buflen+max_iov_size-1)/max_iov_size;
log_write(INFO, "iov[%d] iov_base=%p buflen=%d"
" break to %d vectors\n",
start, iov[start].iov_base,
buflen, nvecs);
for (j = 0; j < nvecs; j++) {
vec.iov_base =
(char *)iov[start].iov_base +
j*max_iov_size;
vec.iov_len = max_iov_size;
if (j == nvecs-1)
vec.iov_len =
buflen -
max_iov_size*(nvecs-1);
remaining_data_length -= vec.iov_len;
log_write(INFO,
"sending vec j=%d iov_base=%p"
" iov_len=%zu "
"remaining_data_length=%d\n",
j, vec.iov_base, vec.iov_len,
remaining_data_length);
rc = smbd_post_send_data(
info, &vec, 1,
remaining_data_length);
if (rc)
goto done;
}
i++;
}
start = i;
buflen = 0;
} else {
i++;
if (i == rqst->rq_nvec) {
/* send out all remaining vecs */
remaining_data_length -= buflen;
log_write(INFO,
"sending iov[] from start=%d i=%d "
"nvecs=%d remaining_data_length=%d\n",
start, i, i-start,
remaining_data_length);
rc = smbd_post_send_data(info, &iov[start],
i-start, remaining_data_length);
if (rc)
goto done;
break;
}
}
log_write(INFO, "looping i=%d buflen=%d\n", i, buflen);
}
/* now sending pages if there are any */
for (i = 0; i < rqst->rq_npages; i++) {
buflen = (i == rqst->rq_npages-1) ?
rqst->rq_tailsz : rqst->rq_pagesz;
nvecs = (buflen + max_iov_size - 1) / max_iov_size;
log_write(INFO, "sending pages buflen=%d nvecs=%d\n",
buflen, nvecs);
for (j = 0; j < nvecs; j++) {
size = max_iov_size;
if (j == nvecs-1)
size = buflen - j*max_iov_size;
remaining_data_length -= size;
log_write(INFO, "sending pages i=%d offset=%d size=%d"
" remaining_data_length=%d\n",
i, j*max_iov_size, size, remaining_data_length);
rc = smbd_post_send_page(
info, rqst->rq_pages[i], j*max_iov_size,
size, remaining_data_length);
if (rc)
goto done;
}
}
done:
/*
* As an optimization, we don't wait for individual I/O to finish
* before sending the next one.
* Send them all and wait for pending send count to get to 0
* that means all the I/Os have been out and we are good to return
*/
wait_event(info->wait_send_payload_pending,
atomic_read(&info->send_payload_pending) == 0);
info->smbd_send_pending--;
wake_up(&info->wait_smbd_send_pending);
return rc;
}
static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct smbd_mr *mr;
struct ib_cqe *cqe;
if (wc->status) {
log_rdma_mr(ERR, "status=%d\n", wc->status);
cqe = wc->wr_cqe;
mr = container_of(cqe, struct smbd_mr, cqe);
smbd_disconnect_rdma_connection(mr->conn);
}
}
/*
* The work queue function that recovers MRs
* We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used
* again. Both calls are slow, so finish them in a workqueue. This will not
* block I/O path.
* There is one workqueue that recovers MRs, there is no need to lock as the
* I/O requests calling smbd_register_mr will never update the links in the
* mr_list.
*/
static void smbd_mr_recovery_work(struct work_struct *work)
{
struct smbd_connection *info =
container_of(work, struct smbd_connection, mr_recovery_work);
struct smbd_mr *smbdirect_mr;
int rc;
list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
if (smbdirect_mr->state == MR_INVALIDATED ||
smbdirect_mr->state == MR_ERROR) {
if (smbdirect_mr->state == MR_INVALIDATED) {
ib_dma_unmap_sg(
info->id->device, smbdirect_mr->sgl,
smbdirect_mr->sgl_count,
smbdirect_mr->dir);
smbdirect_mr->state = MR_READY;
} else if (smbdirect_mr->state == MR_ERROR) {
/* recover this MR entry */
rc = ib_dereg_mr(smbdirect_mr->mr);
if (rc) {
log_rdma_mr(ERR,
"ib_dereg_mr faield rc=%x\n",
rc);
smbd_disconnect_rdma_connection(info);
}
smbdirect_mr->mr = ib_alloc_mr(
info->pd, info->mr_type,
info->max_frmr_depth);
if (IS_ERR(smbdirect_mr->mr)) {
log_rdma_mr(ERR,
"ib_alloc_mr failed mr_type=%x "
"max_frmr_depth=%x\n",
info->mr_type,
info->max_frmr_depth);
smbd_disconnect_rdma_connection(info);
}
smbdirect_mr->state = MR_READY;
}
/* smbdirect_mr->state is updated by this function
* and is read and updated by I/O issuing CPUs trying
* to get a MR, the call to atomic_inc_return
* implicates a memory barrier and guarantees this
* value is updated before waking up any calls to
* get_mr() from the I/O issuing CPUs
*/
if (atomic_inc_return(&info->mr_ready_count) == 1)
wake_up_interruptible(&info->wait_mr);
}
}
}
static void destroy_mr_list(struct smbd_connection *info)
{
struct smbd_mr *mr, *tmp;
cancel_work_sync(&info->mr_recovery_work);
list_for_each_entry_safe(mr, tmp, &info->mr_list, list) {
if (mr->state == MR_INVALIDATED)
ib_dma_unmap_sg(info->id->device, mr->sgl,
mr->sgl_count, mr->dir);
ib_dereg_mr(mr->mr);
kfree(mr->sgl);
kfree(mr);
}
}
/*
* Allocate MRs used for RDMA read/write
* The number of MRs will not exceed hardware capability in responder_resources
* All MRs are kept in mr_list. The MR can be recovered after it's used
* Recovery is done in smbd_mr_recovery_work. The content of list entry changes
* as MRs are used and recovered for I/O, but the list links will not change
*/
static int allocate_mr_list(struct smbd_connection *info)
{
int i;
struct smbd_mr *smbdirect_mr, *tmp;
INIT_LIST_HEAD(&info->mr_list);
init_waitqueue_head(&info->wait_mr);
spin_lock_init(&info->mr_list_lock);
atomic_set(&info->mr_ready_count, 0);
atomic_set(&info->mr_used_count, 0);
init_waitqueue_head(&info->wait_for_mr_cleanup);
/* Allocate more MRs (2x) than hardware responder_resources */
for (i = 0; i < info->responder_resources * 2; i++) {
smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
if (!smbdirect_mr)
goto out;
smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type,
info->max_frmr_depth);
if (IS_ERR(smbdirect_mr->mr)) {
log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x "
"max_frmr_depth=%x\n",
info->mr_type, info->max_frmr_depth);
goto out;
}
smbdirect_mr->sgl = kcalloc(
info->max_frmr_depth,
sizeof(struct scatterlist),
GFP_KERNEL);
if (!smbdirect_mr->sgl) {
log_rdma_mr(ERR, "failed to allocate sgl\n");
ib_dereg_mr(smbdirect_mr->mr);
goto out;
}
smbdirect_mr->state = MR_READY;
smbdirect_mr->conn = info;
list_add_tail(&smbdirect_mr->list, &info->mr_list);
atomic_inc(&info->mr_ready_count);
}
INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
return 0;
out:
kfree(smbdirect_mr);
list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) {
ib_dereg_mr(smbdirect_mr->mr);
kfree(smbdirect_mr->sgl);
kfree(smbdirect_mr);
}
return -ENOMEM;
}
/*
* Get a MR from mr_list. This function waits until there is at least one
* MR available in the list. It may access the list while the
* smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock
* as they never modify the same places. However, there may be several CPUs
* issueing I/O trying to get MR at the same time, mr_list_lock is used to
* protect this situation.
*/
static struct smbd_mr *get_mr(struct smbd_connection *info)
{
struct smbd_mr *ret;
int rc;
again:
rc = wait_event_interruptible(info->wait_mr,
atomic_read(&info->mr_ready_count) ||
info->transport_status != SMBD_CONNECTED);
if (rc) {
log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc);
return NULL;
}
if (info->transport_status != SMBD_CONNECTED) {
log_rdma_mr(ERR, "info->transport_status=%x\n",
info->transport_status);
return NULL;
}
spin_lock(&info->mr_list_lock);
list_for_each_entry(ret, &info->mr_list, list) {
if (ret->state == MR_READY) {
ret->state = MR_REGISTERED;
spin_unlock(&info->mr_list_lock);
atomic_dec(&info->mr_ready_count);
atomic_inc(&info->mr_used_count);
return ret;
}
}
spin_unlock(&info->mr_list_lock);
/*
* It is possible that we could fail to get MR because other processes may
* try to acquire a MR at the same time. If this is the case, retry it.
*/
goto again;
}
/*
* Register memory for RDMA read/write
* pages[]: the list of pages to register memory with
* num_pages: the number of pages to register
* tailsz: if non-zero, the bytes to register in the last page
* writing: true if this is a RDMA write (SMB read), false for RDMA read
* need_invalidate: true if this MR needs to be locally invalidated after I/O
* return value: the MR registered, NULL if failed.
*/
struct smbd_mr *smbd_register_mr(
struct smbd_connection *info, struct page *pages[], int num_pages,
int tailsz, bool writing, bool need_invalidate)
{
struct smbd_mr *smbdirect_mr;
int rc, i;
enum dma_data_direction dir;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
if (num_pages > info->max_frmr_depth) {
log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n",
num_pages, info->max_frmr_depth);
return NULL;
}
smbdirect_mr = get_mr(info);
if (!smbdirect_mr) {
log_rdma_mr(ERR, "get_mr returning NULL\n");
return NULL;
}
smbdirect_mr->need_invalidate = need_invalidate;
smbdirect_mr->sgl_count = num_pages;
sg_init_table(smbdirect_mr->sgl, num_pages);
for (i = 0; i < num_pages - 1; i++)
sg_set_page(&smbdirect_mr->sgl[i], pages[i], PAGE_SIZE, 0);
sg_set_page(&smbdirect_mr->sgl[i], pages[i],
tailsz ? tailsz : PAGE_SIZE, 0);
dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
smbdirect_mr->dir = dir;
rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgl, num_pages, dir);
if (!rc) {
log_rdma_mr(INFO, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
num_pages, dir, rc);
goto dma_map_error;
}
rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgl, num_pages,
NULL, PAGE_SIZE);
if (rc != num_pages) {
log_rdma_mr(INFO,
"ib_map_mr_sg failed rc = %x num_pages = %x\n",
rc, num_pages);
goto map_mr_error;
}
ib_update_fast_reg_key(smbdirect_mr->mr,
ib_inc_rkey(smbdirect_mr->mr->rkey));
reg_wr = &smbdirect_mr->wr;
reg_wr->wr.opcode = IB_WR_REG_MR;
smbdirect_mr->cqe.done = register_mr_done;
reg_wr->wr.wr_cqe = &smbdirect_mr->cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = IB_SEND_SIGNALED;
reg_wr->mr = smbdirect_mr->mr;
reg_wr->key = smbdirect_mr->mr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
/*
* There is no need for waiting for complemtion on ib_post_send
* on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
* on the next ib_post_send when we actaully send I/O to remote peer
*/
rc = ib_post_send(info->id->qp, &reg_wr->wr, &bad_wr);
if (!rc)
return smbdirect_mr;
log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n",
rc, reg_wr->key);
/* If all failed, attempt to recover this MR by setting it MR_ERROR*/
map_mr_error:
ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgl,
smbdirect_mr->sgl_count, smbdirect_mr->dir);
dma_map_error:
smbdirect_mr->state = MR_ERROR;
if (atomic_dec_and_test(&info->mr_used_count))
wake_up(&info->wait_for_mr_cleanup);
return NULL;
}
static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct smbd_mr *smbdirect_mr;
struct ib_cqe *cqe;
cqe = wc->wr_cqe;
smbdirect_mr = container_of(cqe, struct smbd_mr, cqe);
smbdirect_mr->state = MR_INVALIDATED;
if (wc->status != IB_WC_SUCCESS) {
log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status);
smbdirect_mr->state = MR_ERROR;
}
complete(&smbdirect_mr->invalidate_done);
}
/*
* Deregister a MR after I/O is done
* This function may wait if remote invalidation is not used
* and we have to locally invalidate the buffer to prevent data is being
* modified by remote peer after upper layer consumes it
*/
int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
{
struct ib_send_wr *wr, *bad_wr;
struct smbd_connection *info = smbdirect_mr->conn;
int rc = 0;
if (smbdirect_mr->need_invalidate) {
/* Need to finish local invalidation before returning */
wr = &smbdirect_mr->inv_wr;
wr->opcode = IB_WR_LOCAL_INV;
smbdirect_mr->cqe.done = local_inv_done;
wr->wr_cqe = &smbdirect_mr->cqe;
wr->num_sge = 0;
wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey;
wr->send_flags = IB_SEND_SIGNALED;
init_completion(&smbdirect_mr->invalidate_done);
rc = ib_post_send(info->id->qp, wr, &bad_wr);
if (rc) {
log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
smbd_disconnect_rdma_connection(info);
goto done;
}
wait_for_completion(&smbdirect_mr->invalidate_done);
smbdirect_mr->need_invalidate = false;
} else
/*
* For remote invalidation, just set it to MR_INVALIDATED
* and defer to mr_recovery_work to recover the MR for next use
*/
smbdirect_mr->state = MR_INVALIDATED;
/*
* Schedule the work to do MR recovery for future I/Os
* MR recovery is slow and we don't want it to block the current I/O
*/
queue_work(info->workqueue, &info->mr_recovery_work);
done:
if (atomic_dec_and_test(&info->mr_used_count))
wake_up(&info->wait_for_mr_cleanup);
return rc;
}
/*
* Copyright (C) 2017, Microsoft Corporation.
*
* Author(s): Long Li <longli@microsoft.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*/
#ifndef _SMBDIRECT_H
#define _SMBDIRECT_H
#ifdef CONFIG_CIFS_SMB_DIRECT
#define cifs_rdma_enabled(server) ((server)->rdma)
#include "cifsglob.h"
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/mempool.h>
extern int rdma_readwrite_threshold;
extern int smbd_max_frmr_depth;
extern int smbd_keep_alive_interval;
extern int smbd_max_receive_size;
extern int smbd_max_fragmented_recv_size;
extern int smbd_max_send_size;
extern int smbd_send_credit_target;
extern int smbd_receive_credit_max;
enum keep_alive_status {
KEEP_ALIVE_NONE,
KEEP_ALIVE_PENDING,
KEEP_ALIVE_SENT,
};
enum smbd_connection_status {
SMBD_CREATED,
SMBD_CONNECTING,
SMBD_CONNECTED,
SMBD_NEGOTIATE_FAILED,
SMBD_DISCONNECTING,
SMBD_DISCONNECTED,
SMBD_DESTROYED
};
/*
* The context for the SMBDirect transport
* Everything related to the transport is here. It has several logical parts
* 1. RDMA related structures
* 2. SMBDirect connection parameters
* 3. Memory registrations
* 4. Receive and reassembly queues for data receive path
* 5. mempools for allocating packets
*/
struct smbd_connection {
enum smbd_connection_status transport_status;
/* RDMA related */
struct rdma_cm_id *id;
struct ib_qp_init_attr qp_attr;
struct ib_pd *pd;
struct ib_cq *send_cq, *recv_cq;
struct ib_device_attr dev_attr;
int ri_rc;
struct completion ri_done;
wait_queue_head_t conn_wait;
wait_queue_head_t wait_destroy;
struct completion negotiate_completion;
bool negotiate_done;
struct work_struct destroy_work;
struct work_struct disconnect_work;
struct work_struct recv_done_work;
struct work_struct post_send_credits_work;
spinlock_t lock_new_credits_offered;
int new_credits_offered;
/* Connection parameters defined in [MS-SMBD] 3.1.1.1 */
int receive_credit_max;
int send_credit_target;
int max_send_size;
int max_fragmented_recv_size;
int max_fragmented_send_size;
int max_receive_size;
int keep_alive_interval;
int max_readwrite_size;
enum keep_alive_status keep_alive_requested;
int protocol;
atomic_t send_credits;
atomic_t receive_credits;
int receive_credit_target;
int fragment_reassembly_remaining;
/* Memory registrations */
/* Maximum number of RDMA read/write outstanding on this connection */
int responder_resources;
/* Maximum number of SGEs in a RDMA write/read */
int max_frmr_depth;
/*
* If payload is less than or equal to the threshold,
* use RDMA send/recv to send upper layer I/O.
* If payload is more than the threshold,
* use RDMA read/write through memory registration for I/O.
*/
int rdma_readwrite_threshold;
enum ib_mr_type mr_type;
struct list_head mr_list;
spinlock_t mr_list_lock;
/* The number of available MRs ready for memory registration */
atomic_t mr_ready_count;
atomic_t mr_used_count;
wait_queue_head_t wait_mr;
struct work_struct mr_recovery_work;
/* Used by transport to wait until all MRs are returned */
wait_queue_head_t wait_for_mr_cleanup;
/* Activity accoutning */
/* Pending reqeusts issued from upper layer */
int smbd_send_pending;
wait_queue_head_t wait_smbd_send_pending;
int smbd_recv_pending;
wait_queue_head_t wait_smbd_recv_pending;
atomic_t send_pending;
wait_queue_head_t wait_send_pending;
atomic_t send_payload_pending;
wait_queue_head_t wait_send_payload_pending;
/* Receive queue */
struct list_head receive_queue;
int count_receive_queue;
spinlock_t receive_queue_lock;
struct list_head empty_packet_queue;
int count_empty_packet_queue;
spinlock_t empty_packet_queue_lock;
wait_queue_head_t wait_receive_queues;
/* Reassembly queue */
struct list_head reassembly_queue;
spinlock_t reassembly_queue_lock;
wait_queue_head_t wait_reassembly_queue;
/* total data length of reassembly queue */
int reassembly_data_length;
int reassembly_queue_length;
/* the offset to first buffer in reassembly queue */
int first_entry_offset;
bool send_immediate;
wait_queue_head_t wait_send_queue;
/*
* Indicate if we have received a full packet on the connection
* This is used to identify the first SMBD packet of a assembled
* payload (SMB packet) in reassembly queue so we can return a
* RFC1002 length to upper layer to indicate the length of the SMB
* packet received
*/
bool full_packet_received;
struct workqueue_struct *workqueue;
struct delayed_work idle_timer_work;
struct delayed_work send_immediate_work;
/* Memory pool for preallocating buffers */
/* request pool for RDMA send */
struct kmem_cache *request_cache;
mempool_t *request_mempool;
/* response pool for RDMA receive */
struct kmem_cache *response_cache;
mempool_t *response_mempool;
/* for debug purposes */
unsigned int count_get_receive_buffer;
unsigned int count_put_receive_buffer;
unsigned int count_reassembly_queue;
unsigned int count_enqueue_reassembly_queue;
unsigned int count_dequeue_reassembly_queue;
unsigned int count_send_empty;
};
enum smbd_message_type {
SMBD_NEGOTIATE_RESP,
SMBD_TRANSFER_DATA,
};
#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001
/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */
struct smbd_negotiate_req {
__le16 min_version;
__le16 max_version;
__le16 reserved;
__le16 credits_requested;
__le32 preferred_send_size;
__le32 max_receive_size;
__le32 max_fragmented_size;
} __packed;
/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */
struct smbd_negotiate_resp {
__le16 min_version;
__le16 max_version;
__le16 negotiated_version;
__le16 reserved;
__le16 credits_requested;
__le16 credits_granted;
__le32 status;
__le32 max_readwrite_size;
__le32 preferred_send_size;
__le32 max_receive_size;
__le32 max_fragmented_size;
} __packed;
/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */
struct smbd_data_transfer {
__le16 credits_requested;
__le16 credits_granted;
__le16 flags;
__le16 reserved;
__le32 remaining_data_length;
__le32 data_offset;
__le32 data_length;
__le32 padding;
__u8 buffer[];
} __packed;
/* The packet fields for a registered RDMA buffer */
struct smbd_buffer_descriptor_v1 {
__le64 offset;
__le32 token;
__le32 length;
} __packed;
/* Default maximum number of SGEs in a RDMA send/recv */
#define SMBDIRECT_MAX_SGE 16
/* The context for a SMBD request */
struct smbd_request {
struct smbd_connection *info;
struct ib_cqe cqe;
/* true if this request carries upper layer payload */
bool has_payload;
/* the SGE entries for this packet */
struct ib_sge sge[SMBDIRECT_MAX_SGE];
int num_sge;
/* SMBD packet header follows this structure */
u8 packet[];
};
/* The context for a SMBD response */
struct smbd_response {
struct smbd_connection *info;
struct ib_cqe cqe;
struct ib_sge sge;
enum smbd_message_type type;
/* Link to receive queue or reassembly queue */
struct list_head list;
/* Indicate if this is the 1st packet of a payload */
bool first_segment;
/* SMBD packet header and payload follows this structure */
u8 packet[];
};
/* Create a SMBDirect session */
struct smbd_connection *smbd_get_connection(
struct TCP_Server_Info *server, struct sockaddr *dstaddr);
/* Reconnect SMBDirect session */
int smbd_reconnect(struct TCP_Server_Info *server);
/* Destroy SMBDirect session */
void smbd_destroy(struct smbd_connection *info);
/* Interface for carrying upper layer I/O through send/recv */
int smbd_recv(struct smbd_connection *info, struct msghdr *msg);
int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst);
enum mr_state {
MR_READY,
MR_REGISTERED,
MR_INVALIDATED,
MR_ERROR
};
struct smbd_mr {
struct smbd_connection *conn;
struct list_head list;
enum mr_state state;
struct ib_mr *mr;
struct scatterlist *sgl;
int sgl_count;
enum dma_data_direction dir;
union {
struct ib_reg_wr wr;
struct ib_send_wr inv_wr;
};
struct ib_cqe cqe;
bool need_invalidate;
struct completion invalidate_done;
};
/* Interfaces to register and deregister MR for RDMA read/write */
struct smbd_mr *smbd_register_mr(
struct smbd_connection *info, struct page *pages[], int num_pages,
int tailsz, bool writing, bool need_invalidate);
int smbd_deregister_mr(struct smbd_mr *mr);
#else
#define cifs_rdma_enabled(server) 0
struct smbd_connection {};
static inline void *smbd_get_connection(
struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;}
static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; }
static inline void smbd_destroy(struct smbd_connection *info) {}
static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; }
static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; }
#endif
#endif
...@@ -37,6 +37,10 @@ ...@@ -37,6 +37,10 @@
#include "cifsglob.h" #include "cifsglob.h"
#include "cifsproto.h" #include "cifsproto.h"
#include "cifs_debug.h" #include "cifs_debug.h"
#include "smbdirect.h"
/* Max number of iovectors we can use off the stack when sending requests. */
#define CIFS_MAX_IOV_SIZE 8
void void
cifs_wake_up_task(struct mid_q_entry *mid) cifs_wake_up_task(struct mid_q_entry *mid)
...@@ -229,7 +233,10 @@ __smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) ...@@ -229,7 +233,10 @@ __smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
struct socket *ssocket = server->ssocket; struct socket *ssocket = server->ssocket;
struct msghdr smb_msg; struct msghdr smb_msg;
int val = 1; int val = 1;
if (cifs_rdma_enabled(server) && server->smbd_conn) {
rc = smbd_send(server->smbd_conn, rqst);
goto smbd_done;
}
if (ssocket == NULL) if (ssocket == NULL)
return -ENOTSOCK; return -ENOTSOCK;
...@@ -298,7 +305,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) ...@@ -298,7 +305,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
*/ */
server->tcpStatus = CifsNeedReconnect; server->tcpStatus = CifsNeedReconnect;
} }
smbd_done:
if (rc < 0 && rc != -EINTR) if (rc < 0 && rc != -EINTR)
cifs_dbg(VFS, "Error %d sending data on socket to server\n", cifs_dbg(VFS, "Error %d sending data on socket to server\n",
rc); rc);
...@@ -803,12 +810,16 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, ...@@ -803,12 +810,16 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
const int flags, struct kvec *resp_iov) const int flags, struct kvec *resp_iov)
{ {
struct smb_rqst rqst; struct smb_rqst rqst;
struct kvec *new_iov; struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov;
int rc; int rc;
new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), GFP_KERNEL); if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1),
GFP_KERNEL);
if (!new_iov) if (!new_iov)
return -ENOMEM; return -ENOMEM;
} else
new_iov = s_iov;
/* 1st iov is a RFC1001 length followed by the rest of the packet */ /* 1st iov is a RFC1001 length followed by the rest of the packet */
memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
...@@ -823,6 +834,50 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, ...@@ -823,6 +834,50 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
rqst.rq_nvec = n_vec + 1; rqst.rq_nvec = n_vec + 1;
rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov); rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov);
if (n_vec + 1 > CIFS_MAX_IOV_SIZE)
kfree(new_iov);
return rc;
}
/* Like SendReceive2 but iov[0] does not contain an rfc1002 header */
int
smb2_send_recv(const unsigned int xid, struct cifs_ses *ses,
struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
const int flags, struct kvec *resp_iov)
{
struct smb_rqst rqst;
struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov;
int rc;
int i;
__u32 count;
__be32 rfc1002_marker;
if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1),
GFP_KERNEL);
if (!new_iov)
return -ENOMEM;
} else
new_iov = s_iov;
/* 1st iov is an RFC1002 Session Message length */
memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
count = 0;
for (i = 1; i < n_vec + 1; i++)
count += new_iov[i].iov_len;
rfc1002_marker = cpu_to_be32(count);
new_iov[0].iov_base = &rfc1002_marker;
new_iov[0].iov_len = 4;
memset(&rqst, 0, sizeof(struct smb_rqst));
rqst.rq_iov = new_iov;
rqst.rq_nvec = n_vec + 1;
rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov);
if (n_vec + 1 > CIFS_MAX_IOV_SIZE)
kfree(new_iov); kfree(new_iov);
return rc; return rc;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册