我在 R 中有一个马尔可夫链:
set.seed(123)
n_states <- 5
matrix <- matrix(runif(n_states^2), nrow=n_states)
# set some transitions to 0
matrix[1, 4:5] <- 0
matrix[5, 1:3] <- 0
matrix[2, 5] <- 0
transition_matrix <- t(apply(matrix, 1, function(x) x/sum(x)))
rownames(transition_matrix) <- paste0("S", 1:n_states)
colnames(transition_matrix) <- paste0("S", 1:n_states)
print(round(transition_matrix, 3))
它看起来像这样:
S1 S2 S3 S4 S5
S1 0.2229340 0.03531601 0.7417500 0.00000000 0.0000000
S2 0.3910569 0.26197885 0.2248868 0.12207747 0.0000000
S3 0.1536622 0.33530265 0.2545791 0.01580275 0.2406533
S4 0.2652280 0.16563210 0.1719994 0.09849610 0.2986444
S5 0.0000000 0.00000000 0.0000000 0.59278229 0.4072177
对于固定的回合数,我想找出所有可能出现的状态序列及其对应的概率。
我尝试使用循环手动枚举所有这样的序列:
# Function to generate sequences for multiple turn lengths
find_sequences_all_turns <- function(transition_matrix, start_state = 1, max_turns = 5) {
n_states <- nrow(transition_matrix)
all_sequences <- list()
all_probabilities <- numeric()
all_turns <- numeric()
seq_counter <- 1
generate_sequence <- function(current_seq, current_prob, steps_left, total_steps) {
if(length(current_seq) > 1) {
all_sequences[[seq_counter]] <<- current_seq
all_probabilities[seq_counter] <<- current_prob
all_turns[seq_counter] <<- total_steps - steps_left
seq_counter <<- seq_counter + 1
}
if(steps_left == 0) {
return()
}
current_state <- current_seq[length(current_seq)]
possible_next_states <- which(transition_matrix[current_state,] > 0)
for(next_state in possible_next_states) {
prob <- transition_matrix[current_state, next_state]
generate_sequence(
c(current_seq, next_state),
current_prob * prob,
steps_left - 1,
total_steps
)
}
}
generate_sequence(c(start_state), 1, max_turns - 1, max_turns)
result_df <- data.frame(
turn = all_turns,
sequence_no = 1:length(all_sequences),
sequence = sapply(all_sequences, paste, collapse=""),
probability = all_probabilities
)
result_df <- result_df[order(result_df$turn, -result_df$probability),]
rownames(result_df) <- NULL
return(result_df)
}
然后我尝试调用该函数:
sequences_df <- find_sequences_all_turns(transition_matrix)
> sequences_df
turn sequence_no sequence probability
1 2 154 13 7.417500e-01
2 2 1 11 2.229340e-01
3 2 65 12 3.531601e-02
4 3 171 132 2.487108e-01
5 3 193 133 1.888341e-01
6 3 243 135 1.785046e-01
7 3 40 113 1.653613e-01
8 3 155 131 1.139789e-01
9 3 2 111 4.969955e-02
10 3 66 121 1.381057e-02
11 3 218 134 1.172169e-02
12 3 82 122 9.252048e-03
13 3 104 123 7.942105e-03
14 3 18 112 7.873137e-03
15 3 129 124 4.311289e-03
我可以做些什么来确保此代码在大量回合中运行得更快
PS:我使用此代码来验证每次所有概率的总和是否为 1:
library(dplyr)
probability_sums <- sequences_df %>%
group_by(turn) %>%
summarise(
total_probability = sum(probability),
num_sequences = n(),
check_sum_to_one = abs(total_probability - 1) < 1e-10
)
print(probability_sums)
我认为它应该能很好地满足你的目的(给定
tm <- transition_matrix
“转换矩阵”的较短变量名)你可以看到
另一个选项(可能更有效,因为导致
0
概率的行被提前过滤掉)是merge
在repeat
循环中使用,迭代地更新表使得