NumPy随机数生成函数的多种实现方法

1. 传统方法（legacy API）

注意： 从NumPy 1.17开始，推荐使用新方法，但旧方法仍可用。

import numpy as np

# 1.1 使用全局随机状态
np.random.seed(42)  # 设置随机种子

# 生成各种分布的随机数
random_float = np.random.rand()            # [0,1) 均匀分布
random_array = np.random.rand(3, 4)        # 3x4数组
random_uniform = np.random.uniform(1, 10, 5)  # [1,10) 均匀分布
random_normal = np.random.normal(0, 1, 10)  # 标准正态分布
random_integers = np.random.randint(0, 100, 10)  # 整数
random_choice = np.random.choice([1, 2, 3, 4, 5], size=10)  # 从数组中随机选择

2. 新版推荐方法（Generator API）

NumPy 1.17+ 推荐使用 np.random.Generator

import numpy as np

# 2.1 创建Generator实例
rng = np.random.default_rng(seed=42)  # 推荐方式

# 或者使用具体的BitGenerator
from numpy.random import PCG64, MT19937
rng_pcg = np.random.Generator(PCG64(seed=42))
rng_mt = np.random.Generator(MT19937(seed=42))

3. 常用随机数生成方法

3.1 基础随机数

# 创建Generator
rng = np.random.default_rng(seed=42)

# 均匀分布 [0,1)
print(rng.random())              # 单个值
print(rng.random((3, 4)))        # 数组

# 均匀分布 [low, high)
print(rng.uniform(1, 10, size=5))

# 标准正态分布
print(rng.standard_normal(10))
print(rng.normal(0, 1, 10))      # 指定均值和标准差

# 整数
print(rng.integers(0, 100, 10))
print(rng.integers(0, 100, (3, 4), endpoint=True))  # 包含上限

3.2 其他概率分布

# 二项分布
print(rng.binomial(10, 0.5, 100))

# 泊松分布
print(rng.poisson(5, 10))

# 指数分布
print(rng.exponential(1.0, 10))

# 卡方分布
print(rng.chisquare(2, 10))

# 贝塔分布
print(rng.beta(2, 5, 10))

# 伽马分布
print(rng.gamma(2, 2, 10))

4. 随机抽样和排列

# 随机选择
arr = np.arange(10)
print(rng.choice(arr, size=5))              # 不放回
print(rng.choice(arr, size=5, replace=True)) # 放回
print(rng.choice(arr, size=5, p=[0.1]*10))   # 指定概率

# 随机排列
arr = np.arange(10)
rng.shuffle(arr)  # 原地打乱
print(arr)

permuted = rng.permutation(10)  # 返回新数组
print(permuted)

# 随机抽样（高级）
samples = rng.choice(20, size=10, replace=False)  # 不重复抽样
print(samples)

5. 特殊随机数生成

# 多元正态分布
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]]
print(rng.multivariate_normal(mean, cov, 100))

# 狄利克雷分布
print(rng.dirichlet([1, 2, 3], 5))

# 随机bytes
print(rng.bytes(10))

6. 可重复性与并行处理

# 6.1 保存和恢复状态
rng = np.random.default_rng(seed=42)
state = rng.__getstate__()  # 获取当前状态

# 恢复状态
rng_restored = np.random.default_rng()
rng_restored.__setstate__(state)

# 6.2 并行处理中的随机数
# 为每个进程创建独立的Generator
seeds = np.random.SeedSequence(42).spawn(4)  # 生成4个种子
generators = [np.random.default_rng(s) for s in seeds]

7. 性能对比

import time

# 传统方法
start = time.time()
for _ in range(10000):
    np.random.rand(1000)
print(f"传统方法: {time.time() - start:.4f}秒")

# 新方法
rng = np.random.default_rng()
start = time.time()
for _ in range(10000):
    rng.random(1000)
print(f"新方法: {time.time() - start:.4f}秒")

8. 实际应用示例

# 示例1：蒙特卡洛模拟
def monte_carlo_pi(num_samples=1000000):
    rng = np.random.default_rng()
    x = rng.uniform(-1, 1, num_samples)
    y = rng.uniform(-1, 1, num_samples)
    inside_circle = (x**2 + y**2) <= 1
    return 4 * np.mean(inside_circle)

print(f"π ≈ {monte_carlo_pi()}")

# 示例2：数据增强中的随机噪声
def add_random_noise(data, noise_level=0.1):
    rng = np.random.default_rng(seed=42)
    noise = rng.normal(0, noise_level, data.shape)
    return data + noise

# 示例3：随机分割数据集
def train_test_split_random(X, y, test_size=0.2, random_state=42):
    rng = np.random.default_rng(random_state)
    n_samples = len(X)
    indices = rng.permutation(n_samples)
    test_count = int(n_samples * test_size)
    test_idx, train_idx = indices[:test_count], indices[test_count:]
    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]