You have to use 2d array to have 2 min costs from s to t with 2 states 0 (up) and 1 (down) (dis[u][0], dis[u][1]), if you just you 1d array and AC, it must be lucky.
This is my AC code run in 0.031s:
((const) inf = 1e9,
(define) ep = emplace (= push/insert),
(define) eb = emplace_back (= push_back),
ep and eb both are faster than push and push_back,
emplace_front/emplace/emplace_back = push_front/(push/insert)/push_back but faster because it doesn't copy then moves value into array, it pushes directly into array)
#pragma GCC optimize( "Ofast" )
#pragma GCC target( "avx2", "sse4.2", "abm", "bmi", "bmi2", "fma", "mmx", "popcnt", "lzcnt" )
#include <bits/stdc++.h>
#define endl '\n'
#define inf 1e18
#define int long long
#define pii pair <int, int>
#define vii vector <pii>
#define tiii tuple <int, int, int>
#define viii vector <tiii>
#define ep emplace
#define eb emplace_back
using namespace std;
const int MaxN = 1e4 + 10;
int n, m, s, t, f[MaxN][2];
vii a[MaxN];
priority_queue <tiii, viii, greater <tiii>> pq;
signed main()
{
cin.tie(0) -> sync_with_stdio(0);
__builtin_ia32_ldmxcsr ( 40896 );
fill(&f[0][0], &f[0][0] + sizeof(f) / sizeof(f[0][0]), inf);
cin >> n >> m;
for (int u, v, i = 1; i <= m; ++i)
{
cin >> u >> v;
a[u].eb(v, 0);
a[v].eb(u, 1);
}
cin >> s >> t;
f[s][0] = f[s][1] = 0;
pq.ep(f[s][0], s, 0);
pq.ep(f[s][1], s, 1);
while (!pq.empty())
{
auto [x, u, y] = pq.top(); pq.pop();
if (x > f[u][y]) continue;
if (u == t) break;
for (auto [v, z] : a[u])
{
int w = f[u][y] + (y != z);
if (f[v][z] > w)
pq.ep(f[v][z] = w, v, z);
}
}
cout << min(f[t][0], f[t][1]) << endl;
}
if you have any question, you can ask me :))