Docker运行phpRedisAdmin

vi /etc/redis.conf
bind 0.0.0.0 
protected-mode no
systemctl restart redis


docker pull erikdubbelboer/phpredisadmin:latest
docker run --rm -it -e REDIS_1_HOST=192.168.146.128 -e REDIS_1_NAME=MyRedis -p 8080:80 erikdubbelboer/phpredisadmin


http://192.168.146.133:8080/

CentOS 7 通过kubeadm安装Kubernetes 1.18.6

参考:
https://www.cnblogs.com/xiao987334176/p/12696740.html

系统        内核                         docker      ip                 主机名      配置
centos 7.8  3.10.0-1127.18.2.el7.x86_64  19.03.12    192.168.196.133    k8s-master  2核2G
centos 7.8  3.10.0-1127.18.2.el7.x86_64  19.03.12    192.168.196.134    k8s-node01  2核2G
centos 7.8  3.10.0-1127.18.2.el7.x86_64  19.03.12    192.168.196.135    k8s-node02  2核2G

设置固定IP地址:
vi /etc/sysconfig/network-scripts/ifcfg-ens33
BOOTPROTO=static
IPADDR=192.168.196.133
GATEWAY=192.168.196.2
NETMASK=255.255.255.0
DNS1=192.168.196.2

service network restart

设置系统主机名以及 Host 文件的相互解析:
在每个节点都需要设置,hostname可以根据自己的需求自定义
hostnamectl set-hostname k8s-master
cat /etc/hostname
echo '192.168.196.133 k8s-master' >> /etc/hosts
注意:主机名不能带下划线,只能带中划线
否则安装k8s会报错

安装依赖包,每个节点都需要安装这些依赖
yum install -y conntrack ntpdate ntp ipvsadm ipset jq iptables curl sysstat libseccomp wget vim net-tools git

禁用firewalld,启用iptables,并且清空iptables的规则,每个节点都要执行
systemctl stop firewalld && systemctl disable firewalld
yum -y install iptables-services && systemctl start iptables && systemctl enable iptables && iptables -F && service iptables save

关闭虚拟内存(swap),每个节点都需要执行,如果pod运行在虚拟内存中,会大大降低效率,因此最好关闭虚拟内存
swapoff -a && sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab

关闭SELINUX,每个节点都要执行
setenforce 0 && sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config

调整内核参数,对于 K8S,每个节点都要执行
cat < /etc/sysconfig/modules/ipvs.modules <> ~/.bashrc
source  ~/.bashrc
必须退出一次,再次登录,就可以了

由于kubeadm 默认从官网k8s.grc.io下载所需镜像,国内无法访问,因此需要通过手动下载,
查看 kubeadm 会用到的镜像:
kubeadm config images list
k8s.gcr.io/kube-apiserver:v1.18.6
k8s.gcr.io/kube-controller-manager:v1.18.6
k8s.gcr.io/kube-scheduler:v1.18.6
k8s.gcr.io/kube-proxy:v1.18.6
k8s.gcr.io/pause:3.2
k8s.gcr.io/etcd:3.4.3-0
k8s.gcr.io/coredns:1.6.7

docker pull yannanfei0o/kube-apiserver:v1.18.6
docker pull yannanfei0o/kube-controller-manager:v1.18.6
docker pull yannanfei0o/kube-scheduler:v1.18.6
docker pull yannanfei0o/kube-proxy:v1.18.6
docker pull yannanfei0o/pause:3.2
docker pull yannanfei0o/etcd:3.4.3-0
docker pull yannanfei0o/coredns:1.6.7

docker tag yannanfei0o/kube-apiserver:v1.18.6 k8s.gcr.io/kube-apiserver:v1.18.6
docker tag yannanfei0o/kube-controller-manager:v1.18.6 k8s.gcr.io/kube-controller-manager:v1.18.6
docker tag yannanfei0o/kube-scheduler:v1.18.6 k8s.gcr.io/kube-scheduler:v1.18.6
docker tag yannanfei0o/kube-proxy:v1.18.6 k8s.gcr.io/kube-proxy:v1.18.6
docker tag yannanfei0o/pause:3.2 k8s.gcr.io/pause:3.2
docker tag yannanfei0o/etcd:3.4.3-0 k8s.gcr.io/etcd:3.4.3-0
docker tag yannanfei0o/coredns:1.6.7 k8s.gcr.io/coredns:1.6.7


部署 Kubernetes:
kubeadm init --kubernetes-version=1.18.6 --apiserver-advertise-address=k8s-master的IP地址 --service-cidr=10.1.0.0/16 --pod-network-cidr=10.244.0.0/16
参数解释:
–kubernetes-version: 用于指定k8s版本;
–apiserver-advertise-address:用于指定kube-apiserver监听的ip地址,就是 master本机IP地址。
–pod-network-cidr:用于指定Pod的网络范围; 10.244.0.0/16
–service-cidr:用于指定SVC的网络范围;
–image-repository: 指定阿里云镜像仓库地址

集群初始化成功后返回如下信息:
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.196.133:6443 --token xfwalx.n22j3hubdat8b3du \
    --discovery-token-ca-cert-hash sha256:806c47244a3f1284ca5bed54a58914e242541af9144b376b973a36bd74541321

注意保存好最后一句kubeadm join ...,后面会用到。
如果忘记,可以在master上执行:
kubeadm token create --print-join-command


配置kubectl工具,每个节点都要执行
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

mkdir k8s
cd k8s
wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
如果yml中的"Network": "10.244.0.0/16"和kubeadm init xxx --pod-network-cidr不一样,就需要修改成一样的。不然可能会使得Node间Cluster IP不通。

查看yaml需要的镜像,每个节点都要执行
cat kube-flannel.yml |grep image|uniq
    image: quay.io/coreos/flannel:v0.12.0-amd64
    image: quay.io/coreos/flannel:v0.12.0-arm64
    image: quay.io/coreos/flannel:v0.12.0-arm
    image: quay.io/coreos/flannel:v0.12.0-ppc64le
    image: quay.io/coreos/flannel:v0.12.0-s390x
下载:
docker pull quay.io/coreos/flannel:v0.12.0-amd64
docker pull quay.io/coreos/flannel:v0.12.0-arm64
docker pull quay.io/coreos/flannel:v0.12.0-arm
docker pull quay.io/coreos/flannel:v0.12.0-ppc64le
docker pull quay.io/coreos/flannel:v0.12.0-s390x

加载flannel
kubectl apply -f kube-flannel.yml

查看Pod状态
等待几分钟,确保所有的Pod都处于Running状态,或者重启一下
kubectl get pod --all-namespaces -o wide

设置开机启动
systemctl enable kubelet

systemctl restart kubelet.service

node加入集群
修改主机名部分
hostnamectl set-hostname k8s-node01
cat /etc/hostname
echo '192.168.196.134 k8s-node01' >> /etc/hosts

hostnamectl set-hostname k8s-node02
cat /etc/hostname
echo '192.168.196.135 k8s-node02' >> /etc/hosts

加入节点
登录到node节点,确保已经安装了docker和kubeadm,kubelet,kubectl
kubeadm join 192.168.196.133:6443 --token xfwalx.n22j3hubdat8b3du \
    --discovery-token-ca-cert-hash sha256:806c47244a3f1284ca5bed54a58914e242541af9144b376b973a36bd74541321

如果报错:
[ERROR DirAvailable--etc-kubernetes-manifests]: /etc/kubernetes/manifests is not empty
执行:
kubeadm reset

查看节点
登录k8s-master执行
kubectl get nodes -o wide
NAME         STATUS   ROLES    AGE    VERSION   INTERNAL-IP       EXTERNAL-IP   OS-IMAGE                KERNEL-VERSION                CONTAINER-RUNTIME
k8s-master   Ready    master   65m    v1.18.6   192.168.196.133           CentOS Linux 7 (Core)   3.10.0-1127.18.2.el7.x86_64   docker://19.3.12
k8s-node01   Ready       3m2s   v1.18.6   192.168.196.134           CentOS Linux 7 (Core)   3.10.0-1127.18.2.el7.x86_64   docker://19.3.12
k8s-node02   Ready       38s    v1.18.6   192.168.196.135           CentOS Linux 7 (Core)   3.10.0-1127.18.2.el7.x86_64   docker://19.3.12

登录到node节点,查看 ifconfig
flannel.1: flags=4163  mtu 1450
    inet 10.244.1.0  netmask 255.255.255.255  broadcast 0.0.0.0
    ether da:90:fe:ef:69:75  txqueuelen 0  (Ethernet)
    RX packets 0  bytes 0 (0.0 B)
    RX errors 0  dropped 0  overruns 0  frame 0
    TX packets 0  bytes 0 (0.0 B)
    TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
它会创建一个flannel.1网卡,用来做flannel网络通信的。
部署 nginx Deployment
登录k8s-master
vi nginx-deployment.yaml
apiVersion: apps/v1	#与k8s集群版本有关,使用 kubectl api-versions 即可查看当前集群支持的版本
kind: Deployment	#该配置的类型,我们使用的是 Deployment
metadata:	        #译名为元数据,即 Deployment 的一些基本属性和信息
  name: nginx-deployment	#Deployment 的名称
  labels:	    #标签,可以灵活定位一个或多个资源,其中key和value均可自定义,可以定义多组,目前不需要理解
    app: nginx	#为该Deployment设置key为app,value为nginx的标签
spec:	        #这是关于该Deployment的描述,可以理解为你期待该Deployment在k8s中如何使用
  replicas: 1	#使用该Deployment创建一个应用程序实例
  selector:	    #标签选择器,与上面的标签共同作用,目前不需要理解
    matchLabels: #选择包含标签app:nginx的资源
      app: nginx
  template:	    #这是选择或创建的Pod的模板
    metadata:	#Pod的元数据
      labels:	#Pod的标签,上面的selector即选择包含标签app:nginx的Pod
        app: nginx
    spec:	    #期望Pod实现的功能(即在pod中部署)
      containers:	#生成container,与docker中的container是同一种
      - name: nginx	#container的名称
        image: nginx:latest	#使用镜像nginx:latest创建container,该container默认80端口可访问

在各节点执行
docker pull nginx:latest

应用 YAML 文件
kubectl apply -f nginx-deployment.yaml

查看部署结果
# 查看 Deployment
kubectl get deployments

# 查看 Pod
kubectl get pods

#查看名称为nginx-XXXXXX的Pod的信息
kubectl describe pod nginx-deployment-674ff86d-m9d9f

查看pod中的容器的打印日志
kubectl logs -f nginx-deployment-674ff86d-m9d9f

在pod中的容器环境内执行命令
kubectl exec -it nginx-deployment-674ff86d-m9d9f /bin/bash


访问创建的Nginx server
为 nginx Deployment 创建一个 Service:
vi nginx-service.yaml
apiVersion: v1
kind: Service
metadata:
  name: nginx-service	#Service 的名称
  labels:     	#Service 自己的标签
    app: nginx	#为该 Service 设置 key 为 app,value 为 nginx 的标签
spec:	    #这是关于该 Service 的定义,描述了 Service 如何选择 Pod,如何被访问
  selector:	    #标签选择器
    app: nginx	#选择包含标签 app:nginx 的 Pod
  ports:
  - name: http	#端口的名字, or https
    protocol: TCP	    #协议类型 TCP/UDP
    port: 80	        #集群内的其他容器组可通过 80 端口访问 Service
    nodePort: 32500   #通过任意节点的 32600 端口访问 Service
    targetPort: 80	#将请求转发到匹配 Pod 的 80 端口
  type: NodePort	#Serive的类型,ClusterIP/NodePort/LoaderBalancer

kubectl apply -f nginx-service.yaml

检查执行结果
kubectl get services -o wide

访问服务
curl <任意节点的 IP>:32500


pod重启
kubectl get pod pod名称 -n 命名空间名称 -o yaml | kubectl replace --force -f -
如果IP地址发生变化:
rm -rf /etc/kubernetes/*
rm -rf ~/.kube/*
rm -rf /var/lib/etcd/*

kubeadm reset

master节点重新初始化:
kubeadm init --apiserver-advertise-address ...

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

node加入master节点:
kubeadm join ...

scp -r /etc/kubernetes/admin.conf ${node1}:/root/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

重装flannel
#第一步,在master节点删除flannel
cd k8s
kubectl delete -f kube-flannel.yml
 
#第二步,在node节点清理flannel网络留下的文件
ifconfig cni0 down
ip link delete cni0
ifconfig flannel.1 down
ip link delete flannel.1
rm -rf /var/lib/cni/
rm -f /etc/cni/net.d/*
重启kubelet
systemctl restart docker
systemctl restart kubelet.service

#第三步,安装flannel
kubectl apply -f kube-flannel.yml
systemctl restart docker
systemctl restart kubelet.service
coredns-66bff467f8-4ctpf组件显示Completed,无IP地址
kubectl describe pod coredns-66bff467f8-4ctpf -n kube-system
出现
network: open /run/flannel/subnet.env: no such file or directory

mkdir /run/flannel
vi /run/flannel/subnet.env
FLANNEL_NETWORK=10.244.0.0/16
FLANNEL_SUBNET=10.244.0.1/24
FLANNEL_MTU=1450
FLANNEL_IPMASQ=true

systemctl restart docker
systemctl restart kubelet.service
kube-flannel-ds-amd64-4dz47显示CrashLoopBackOff
kubectl logs -n kube-system kube-flannel-ds-amd64-4dz47 -c kube-flannel
Failed to find any valid interface to use: failed to get default interface: Unable to find default route
出现这种情况有可能是网络配置错误导致无法连接外网,ping检查一下网络是否连通

Windows编译 RedisDesktopManager 2020.1

git clone --recursive https://github.com/uglide/RedisDesktopManager.git

Install python 3.7
Edit RedisDesktopManager\3rdparty\pyotherside.pri
win32* {
    QMAKE_LIBS += -LC:\Users\li\AppData\Local\Programs\Python\Python37\libs -lpython37
    INCLUDEPATH += C:\Users\li\AppData\Local\Programs\Python\Python37\include

cd 3rdparty/qredisclient/3rdparty/hiredis
git apply ../hiredis-win.patch
若无法自动patch,则需要手动修改

下载 nuget
https://dist.nuget.org/win-x86-commandline/latest/nuget.exe

使用国内的镜像源:
nuget sources Disable -Name "nuget.org"
nuget sources add -Name "huaweicloud" -Source "https://mirrors.huaweicloud.com/repository/nuget/v3/index.json"

cd 3rdparty
nuget install zlib-msvc14-x64 -Version 1.2.11.7795

Start QtCreator, Select MSVS_2019_x64, Build

cd RedisDesktopManager\bin\windows\release
windeployqt --release --qmldir D:\Qt\Qt5.14.2\5.14.2\msvc2017_64\qml rdm.exe

copy D:\Qt\Qt5.14.2\5.14.2\msvc2017_64\qml\QtCharts to RedisDesktopManager\bin\windows\release
copy D:\Qt\Qt5.14.2\5.14.2\msvc2017_64\qml\Qt\labs\qmlmodels to RedisDesktopManager\bin\windows\release\Qt\labs

Run rdm.exe

CentOS 7安装npm

wget https://nodejs.org/dist/v12.18.3/node-v12.18.3-linux-x64.tar.xz

tar -xvf node-v12.18.3-linux-x64.tar.xz -C .
mv node-v12.18.3-linux-x64 /usr/local/node

vim /etc/profile
Add:
#set for nodejs
export NODE_HOME=/usr/local/node
export PATH=$NODE_HOME/bin:$PATH

source /etc/profile
node -v
npm -v


设置镜像
npm config set registry https://registry.npm.taobao.org

VS Code/CLion + Rust

Extract rust_downloaded_use_rustup.7z and add to enverment vars:
CARGO_HOME:F:\.cargo
RUSTUP_HOME:F:\.rustup
Add %CARGO_HOME%\bin to PATH

Or download rustup-init.exe, install by network, then:
rustup toolchain install stable-x86_64-pc-windows-msvc
rustup component add rust-analysis --toolchain stable-x86_64-pc-windows-msvc
rustup component add rust-src --toolchain stable-x86_64-pc-windows-msvc
rustup component add rls --toolchain stable-x86_64-pc-windows-msvc

rustup toolchain install stable-i686-pc-windows-msvc
rustup component add rust-analysis --toolchain stable-i686-pc-windows-msvc
rustup component add rust-src --toolchain stable-i686-pc-windows-msvc
rustup component add rls --toolchain stable-i686-pc-windows-msvc

rustup toolchain install stable-x86_64-pc-windows-gnu
rustup component add rust-analysis --toolchain stable-x86_64-pc-windows-gnu
rustup component add rust-src --toolchain stable-x86_64-pc-windows-gnu
rustup component add rls --toolchain stable-x86_64-pc-windows-gnu

rustup toolchain install stable-i686-pc-windows-gnu
rustup component add rust-analysis --toolchain stable-i686-pc-windows-gnu
rustup component add rust-src --toolchain stable-i686-pc-windows-gnu
rustup component add rls --toolchain stable-i686-pc-windows-gnu

rustup show


1:Use VS Code
Install VS Code x64-1.46.1, extract vscode_plugin.7z to C:\Users\li\.vscode, 
Or install blow extensions online:
Rust for Visual Studio Code (powered by Rust Language Server/Rust Analyzer)
C/C++
Better TOML

File - Preferences - Settings - Features - Debug:
Allow setting breakpoints in any file.

Terminal - Configure Tasks - Rust:Cargo build xxx:
copy content of "label", paste to blow "preLaunchTask"

Run - Start Debugging - C++ (Windows)
This should create and open launch.json, edit:
"preLaunchTask": "Rust: cargo build - xxx",
"program": "${workspaceRoot}/target/debug/${workspaceFolderBasename}.exe",
"externalConsole": true
If use MinGW, add:"miDebuggerPath": "D:/Qt/Qt5.14.2/Tools/mingw730_64/bin/gdb.exe",

Set breakpoint and press F5 to debug

Tips: msvc debugger looks better than gdb



2:Use CLion
rustup toolchain install stable-x86_64-pc-windows-gnu
rustup default stable-x86_64-pc-windows-gnu
rustup show
Download CLion-2020.1.2.exe, intellij-toml-0.2.124.40-201.zip, intellij-rust-0.2.125.3191-201.zip
Install CLion and plugins
If direct launch CLion, error info will show in Setting - Languages - Rust - Standard Library:(Download via rustup), 
you need open VSCode, then will display same message - "Rust Src", click install
CLion debug Rust exe only support MinGW64, install and set Settings - Build - Toolchains - MinGW - Environment: D:\Qt\Qt5.14.2\Tools\mingw730_64
You can start debugging

VS2019离线安装

下载
vs_enterprise.exe --layout D:\vslayout --add Microsoft.VisualStudio.Workload.NativeDesktop --includeRecommended --add Microsoft.VisualStudio.Workload.ManagedDesktop --add Microsoft.VisualStudio.Component.VC.ATLMFC --lang zh-CN

断网,启动D:\vslayout\vs_setup.exe开始安装,注意不要运行vs_enterprise__xxxxxxx.exe



注意:Windows 7 SP1安装VS2019可能出现闪退问题,解决方法:
安装kb4474419-v3
https://www.catalog.update.microsoft.com/Search.aspx?q=KB4474419

Qt 5.14.1 配置Android SDK时异常

Qt 5.14.1 配置Android SDK时提示Error:
SDK tools installed.
Platform SDK tools installed.

网上查到的各种解决方法都无效。
最后自己发现一个解决方法:
下载 https://dl.google.com/android/repository/sdk-tools-windows-4333796.zip
解压到Android Sdk目录下

Git如何永久删除历史文件

通过以下命令,就可以永久删除你想要删除的任何文件:

git filter-branch --force --index-filter 'git rm --cached --ignore-unmatch path-to-your-remove-file' --prune-empty --tag-name-filter cat -- --all

将path-to-your-remove-file替换为删除文件的相对路径,并执行。如果有以下执行反馈,说明删除成功了

Ref 'refs/heads/master' was rewritten
Ref 'refs/remotes/origin/master' was rewritten
WARNING: Ref 'refs/remotes/origin/master' is unchanged
Ref 'refs/tags/0.1' was rewritten
Ref 'refs/tags/v0.2' was rewritten

如果所有分支都是unchanged说明要么是该分支没有要删除的文件,要么是删除文件的路径不对。

此时你会发现本地目录中的.git文件并不会马上就变小,而是与原来是一样的,
是因为Git仓库历史有个缓存期,如果不主动回收、清理仓库历史,一般的这些记录还会保存一段时间,以备你突然后悔了,没办法找回删掉的文件。
通过以下命令主动回收资源:

rm -rf .git/refs/original/
git reflog expire --expire=now --all
git gc --prune=now
git gc --aggressive --prune=now

执行以上命令,就会发现.git目录变小了。那么接下来只要把本地的记录,强制更新到远程仓库就行了。

git push origin --force --all
git push origin --force --tags
# 查看历史大文件
git rev-list --objects --all | grep "$(git verify-pack -v .git/objects/pack/*.idx | sort -k 3 -n | tail -5 | awk '{print$1}')"

单线程、SSE、AVX运行效率对比

测试硬件:CPU-i5-4590
命令行:/arch:AVX
优化项:/O2

main.cpp

#include <iostream>
#include <vector>
#include "method.h"
#include <random>
#include <time.h>

using std::default_random_engine;
using std::uniform_real_distribution;

int main(int argc, char* argv[])
{
	//乘法累加运算
	{
		int size = 33;
		float *input1 = (float *)malloc(sizeof(float) * size);
		float *input2 = (float *)malloc(sizeof(float) * size);

		default_random_engine e;
		uniform_real_distribution<float> u(0, 1); //随机数分布对象 
		for (int i = 0; i < size; i++)
		{
			input1[i] = u(e);
			input2[i] = u(e);
		}

		int cntLoop = 10000000;

		clock_t start_t = clock();
		float org = 0.0;
		for (int i = 0; i < cntLoop; i++)
			org = MathMulAdd(input1, input2, size);
		printf("org = %f\t", org);
		printf("cost time: %d(ms)\n", clock() - start_t);

		start_t = clock();
		float sse = 0.0;
		for (int i = 0; i < cntLoop; i++)
			sse = SSEMulAdd(input1, input2, size);
		printf("sse = %f\t", sse);
		printf("cost time: %d(ms)\n", clock() - start_t);

		start_t = clock();
		float sse_ = 0.0;
		for (int i = 0; i < cntLoop; i++)
			sse_ = SSEFmAdd(input1, input2, size);
		printf("sse_= %f\t", sse_);
		printf("cost time: %d(ms)\n", clock() - start_t);

		start_t = clock();
		float avx = 0.0;
		for (int i = 0; i < cntLoop; i++)
			avx = AVXMulAdd(input1, input2, size);
		printf("avx = %f\t", avx);
		printf("cost time: %d(ms)\n", clock() - start_t);

		start_t = clock();
		float avx_ = 0.0;
		for (int i = 0; i < cntLoop; i++)
			avx_ = AVXFmAdd(input1, input2, size);
		printf("avx_= %f\t", avx_);
		printf("cost time: %d(ms)\n", clock() - start_t);

		free(input1);
		free(input2);
	}
	//结果:
	//org = 11.216135 cost time : 174(ms)
	//sse = 11.216136 cost time : 102(ms)
	//sse_ = 11.216136 cost time : 119(ms)
	//avx = 11.216136 cost time : 63(ms)
	//avx_ = 11.216136 cost time : 61(ms)


	//加法运算
	//{
	//	int size = 27;
	//	float *input = (float *)malloc(sizeof(float) * size);
	//	for (int i = 0; i < size; i++)
	//		input[i] = 0.0025;

	//	int cntLoop = 300000000;
	//	clock_t start_t = clock();
	//	float org = 0.0;
	//	for (int i = 0; i < cntLoop; i++)
	//		org = MathSum(input, size);
	//	printf("org = %f\t", org);
	//	printf("cost time: %d\n", clock() - start_t);

	//	start_t = clock();
	//	float sse = 0.0;
	//	for (int i = 0; i < cntLoop; i++)
	//		sse = SSESum(input, size);
	//	printf("sse = %f\t", sse);
	//	printf("cost time: %d\n", clock() - start_t);

	//	start_t = clock();
	//	float avx = 0.0;
	//	for (int i = 0; i < cntLoop; i++)
	//		avx = AVXSum(input, size);
	//	printf("avx = %f\t", avx);
	//	printf("cost time: %d\n", clock() - start_t);

	//	free(input);
	//}
	//结果:
	//org = 0.067500  cost time : 3062
	//sse = 0.067500  cost time : 2283
	//avx = 0.067500  cost time : 1829


	//最大值/最小值运算
	//{
	//	int size = 58;
	//	float *input = (float *)malloc(sizeof(float) * size);

	//	default_random_engine e;
	//	uniform_real_distribution<float> u(0, 3); //随机数分布对象 
	//	for (int i = 0; i < size; i++)
	//	{
	//		input[i] = u(e);
	//		printf("%f ", input[i]);
	//		if ((i + 1) % 8 == 0)
	//			printf("\n");
	//	}
	//	printf("\n");

	//	int cntLoop = 100000000;
	//	clock_t start_t = clock();
	//	float org;
	//	for (int i = 0; i < cntLoop; i++)
	//		org = MathMax(input, size);
	//	printf("org = %f\t", org);
	//	printf("cost time: %d(ms)\n", clock() - start_t);

	//	start_t = clock();
	//	float sse;
	//	for (int i = 0; i < cntLoop; i++)
	//		sse = SSEMax(input, size);
	//	printf("sse = %f\t", sse);
	//	printf("cost time: %d(ms)\n", clock() - start_t);

	//	start_t = clock();
	//	float avx;
	//	for (int i = 0; i < cntLoop; i++)
	//		avx = AVXMax(input, size);
	//	printf("avx = %f\t", avx);
	//	printf("cost time: %d(ms)\n", clock() - start_t);

	//	free(input);
	//}
	//结果:
	//org = 2.989384  cost time : 9491(ms)
	//sse = 2.989384  cost time : 1261(ms)
	//avx = 2.989384  cost time : 1413(ms)


	return 0;
}

method.h

#pragma once

#include <intrin.h>
#include <stdio.h>

float MathMulAdd(const float *input1, const float *input2, int size);
float SSEMulAdd(const float *input1, const float *input2, int size);
float SSEFmAdd(const float *input1, const float *input2, int size);
float AVXMulAdd(const float *input1, const float *input2, int size);
float AVXFmAdd(const float *input1, const float *input2, int size);


float MathSum(const float *input, int size);
float SSESum(const float *input, int size);
float AVXSum(const float *input, int size);


float MathMax(const float *input, int size);
float SSEMax(const float *input, int size);
float AVXMax(const float *input, int size);

method.cpp

#include "method.h"


float MathMulAdd(const float *input1, const float *input2, int size)
{
	float output = 0.0;
	for (int i = 0; i < size; i++)
	{
		output += input1[i] * input2[i];
	}
	return output;
}

float SSEMulAdd(const float *input1, const float *input2, int size)
{
	if (input1 == nullptr || input2 == nullptr)
	{
		printf("input data is null\n");
		return -1;
	}
	int nBlockWidth = 4;
	int cntBlock = size / nBlockWidth;
	int cntRem = size % nBlockWidth;

	float output = 0;
	__m128 loadData1, loadData2;
	__m128 mulData = _mm_setzero_ps();
	__m128 sumData = _mm_setzero_ps();
	const float *p1 = input1;
	const float *p2 = input2;
	for (int i = 0; i < cntBlock; i++)
	{
		loadData1 = _mm_load_ps(p1);
		loadData2 = _mm_load_ps(p2);
		mulData = _mm_mul_ps(loadData1, loadData2);
		sumData = _mm_add_ps(sumData, mulData);
		p1 += nBlockWidth;
		p2 += nBlockWidth;
	}
	sumData = _mm_hadd_ps(sumData, sumData); // p[0] + p[1] + p[4] + p[5] + ...
	sumData = _mm_hadd_ps(sumData, sumData); // p[2] + p[3] + p[6] + p[7] + ...
	output += sumData.m128_f32[(0)];         // 前4组

	for (int i = 0; i < cntRem; i++)
	{
		output += p1[i] * p2[i];
	}

	return output;
}

float SSEFmAdd(const float *input1, const float *input2, int size)
{
	if (input1 == nullptr || input2 == nullptr)
	{
		printf("input data is null\n");
		return -1;
	}
	int nBlockWidth = 4;
	int cntBlock = size / nBlockWidth;
	int cntRem = size % nBlockWidth;

	float output = 0;
	__m128 loadData1, loadData2;
	__m128 sumData = _mm_setzero_ps();
	const float *p1 = input1;
	const float *p2 = input2;
	for (int i = 0; i < cntBlock; i++)
	{
		loadData1 = _mm_load_ps(p1);
		loadData2 = _mm_load_ps(p2);
		sumData = _mm_fmadd_ps(loadData1, loadData2, sumData);
		p1 += nBlockWidth;
		p2 += nBlockWidth;
	}
	sumData = _mm_hadd_ps(sumData, sumData); // p[0] + p[1] + p[4] + p[5] + ...
	sumData = _mm_hadd_ps(sumData, sumData); // p[2] + p[3] + p[6] + p[7] + ...
	output += sumData.m128_f32[(0)];         // 前4组

	for (int i = 0; i < cntRem; i++)
	{
		output += p1[i] * p2[i];
	}

	return output;
}

float AVXMulAdd(const float *input1, const float *input2, int size)
{
	if (input1 == nullptr || input2 == nullptr)
	{
		printf("input data is null\n");
		return -1;
	}
	int nBlockWidth = 8;
	int cntBlock = size / nBlockWidth;
	int cntRem = size % nBlockWidth;

	float output = 0;
	__m256 loadData1, loadData2;
	__m256 mulData = _mm256_setzero_ps();
	__m256 sumData = _mm256_setzero_ps();
	const float *p1 = input1;
	const float *p2 = input2;
	for (int i = 0; i < cntBlock; i++)
	{
		loadData1 = _mm256_load_ps(p1);
		loadData2 = _mm256_load_ps(p2);
		mulData = _mm256_mul_ps(loadData1, loadData2);
		sumData = _mm256_add_ps(sumData, mulData);
		p1 += nBlockWidth;
		p2 += nBlockWidth;
	}
	sumData = _mm256_hadd_ps(sumData, sumData); // p[0] + p[1] + p[4] + p[5] + p[8] + p[9] + p[12] + p[13] + ... 
	sumData = _mm256_hadd_ps(sumData, sumData); // p[2] + p[3] + p[6] + p[7] + p[10] + p[11] + p[14] + p[15] + ... 
	output += sumData.m256_f32[(0)];            // 前4组
	output += sumData.m256_f32[(4)];            // 后4组

	for (int i = 0; i < cntRem; i++)
	{
		output += p1[i] * p2[i];
	}

	return output;
}

float AVXFmAdd(const float *input1, const float *input2, int size)
{
	if (input1 == nullptr || input2 == nullptr)
	{
		printf("input data is null\n");
		return -1;
	}
	int nBlockWidth = 8;
	int cntBlock = size / nBlockWidth;
	int cntRem = size % nBlockWidth;

	float output = 0;
	__m256 loadData1, loadData2;
	__m256 sumData = _mm256_setzero_ps();
	const float *p1 = input1;
	const float *p2 = input2;
	for (int i = 0; i < cntBlock; i++)
	{
		loadData1 = _mm256_load_ps(p1);
		loadData2 = _mm256_load_ps(p2);
		sumData = _mm256_fmadd_ps(loadData1, loadData2, sumData);
		p1 += nBlockWidth;
		p2 += nBlockWidth;
	}
	sumData = _mm256_hadd_ps(sumData, sumData); // p[0] + p[1] + p[4] + p[5] + p[8] + p[9] + p[12] + p[13] + ... 
	sumData = _mm256_hadd_ps(sumData, sumData); // p[2] + p[3] + p[6] + p[7] + p[10] + p[11] + p[14] + p[15] + ... 
	output += sumData.m256_f32[(0)];            // 前4组
	output += sumData.m256_f32[(4)];            // 后4组

	for (int i = 0; i < cntRem; i++)
	{
		output += p1[i] * p2[i];
	}

	return output;
}



float MathSum(const float *input, int size)
{
	float output = 0.0;
	for (int i = 0; i < size; i++)
	{
		output += input[i];
	}
	return output;
}

float SSESum(const float *input, int size)
{
	if (input == nullptr)
	{
		printf("input data is null\n");
		return -1;
	}
	int nBlockWidth = 4;
	int cntBlock = size / nBlockWidth;
	int cntRem = size % nBlockWidth;

	float output = 0;
	__m128 loadData;
	__m128 sumData = _mm_setzero_ps();
	const float *p = input;
	for (int i = 0; i < cntBlock; i++)
	{
		loadData = _mm_load_ps(p);
		sumData = _mm_add_ps(sumData, loadData);
		p += nBlockWidth;
	}
	sumData = _mm_hadd_ps(sumData, sumData); // p[0] + p[1] + p[4] + p[5] + ...
	sumData = _mm_hadd_ps(sumData, sumData); // p[2] + p[3] + p[6] + p[7] + ...
	output += sumData.m128_f32[(0)];         // 前4组

	for (int i = 0; i < cntRem; i++)
	{
		output += p[i];
	}

	return output;
}

float AVXSum(const float *input, int size)
{
	if (input == nullptr)
	{
		printf("input data is null\n");
		return -1;
	}
	int nBlockWidth = 8;
	int cntBlock = size / nBlockWidth;
	int cntRem = size % nBlockWidth;

	float output = 0;
	__m256 loadData;
	__m256 sumData = _mm256_setzero_ps();
	const float *p = input;
	for (int i = 0; i < cntBlock; i++)
	{
		loadData = _mm256_load_ps(p);
		sumData = _mm256_add_ps(sumData, loadData);
		p += nBlockWidth;
	}
	sumData = _mm256_hadd_ps(sumData, sumData); // p[0] + p[1] + p[4] + p[5] + p[8] + p[9] + p[12] + p[13] + ... 
	sumData = _mm256_hadd_ps(sumData, sumData); // p[2] + p[3] + p[6] + p[7] + p[10] + p[11] + p[14] + p[15] + ... 
	output += sumData.m256_f32[(0)];            // 前4组
	output += sumData.m256_f32[(4)];            // 后4组

	for (int i = 0; i < cntRem; i++)
	{
		output += p[i];
	}

	return output;
}


float MathMax(const float *input, int size)
{
	float maxVal = input[0];
	for (int i = 1; i < size; i++)
	{
		maxVal = maxVal > input[i] ? maxVal : input[i];
	}

	return maxVal;
}

float SSEMax(const float *input, int size)
{
	if (input == nullptr)
	{
		printf("input data is null\n");
		return -1;
	}
	int nBlockWidth = 4;
	int cntBlock = size / nBlockWidth;
	int cntRem = size % nBlockWidth;

	__declspec(align(16)) float output[4];
	__m128 loadData;
	const float *p = input;

	__m128 maxVal = _mm_load_ps(p);
	p += nBlockWidth;

	for (int i = 1; i < cntBlock; i++)
	{
		loadData = _mm_load_ps(p);
		maxVal = _mm_max_ps(maxVal, loadData);

		p += nBlockWidth;
	}
	_mm_store_ps(output, maxVal);

	float maxVal_ = output[0];
	for (int i = 1; i < 4; i++)
	{
		maxVal_ = maxVal_ > output[i] ? maxVal_ : output[i];
	}
	for (int i = 0; i < cntRem; i++)
	{
		maxVal_ = maxVal_ > p[i] ? maxVal_ : p[i];
	}

	return maxVal_;
}

float AVXMax(const float *input, int size)
{
	if (input == nullptr)
	{
		printf("input data is null\n");
		return -1;
	}
	int nBlockWidth = 8;
	int cntBlock = size / nBlockWidth;
	int cntRem = size % nBlockWidth;

	__declspec(align(32)) float output[8];
	__m256 loadData;
	const float *p = input;

	__m256 maxVal = _mm256_load_ps(p);
	p += nBlockWidth;

	for (int i = 1; i < cntBlock; i++)
	{
		loadData = _mm256_load_ps(p);
		maxVal = _mm256_max_ps(maxVal, loadData);

		p += nBlockWidth;
	}
	_mm256_store_ps(output, maxVal);

	float maxVal_ = output[0];
	for (int i = 1; i < 8; i++)
	{
		maxVal_ = maxVal_ > output[i] ? maxVal_ : output[i];
	}
	for (int i = 0; i < cntRem; i++)
	{
		maxVal_ = maxVal_ > p[i] ? maxVal_ : p[i];
	}

	return maxVal_;
}