:: Probability on Finite and Discrete Set and Uniform Distribution
:: by Hiroyuki Okazaki
::
:: Received May 5, 2009
:: Copyright (c) 2009-2021 Association of Mizar Users


notation
let S be set ;
let s be FinSequence of S;
synonym whole_event s for dom S;
end;

notation
let S be set ;
let s be FinSequence of S;
let x be set ;
synonym event_pick (x,s) for Coim (S,s);
end;

definition
let S be set ;
let s be FinSequence of S;
let x be set ;
:: original: event_pick
redefine func event_pick (x,s) -> Event of (whole_event );
correctness
coherence
event_pick (,x) is Event of (whole_event )
;
by RELAT_1:132;
end;

definition
let S be finite set ;
let s be FinSequence of S;
let x be set ;
func frequency (x,s) -> Nat equals :: DIST_1:def 1
card (event_pick (x,s));
correctness
coherence
card (event_pick (x,s)) is Nat
;
;
end;

:: deftheorem defines frequency DIST_1:def 1 :
for S being finite set
for s being FinSequence of S
for x being set holds frequency (x,s) = card (event_pick (x,s));

theorem :: DIST_1:1
canceled;

::$CT
theorem :: DIST_1:2
for S being set
for s being FinSequence of S
for e being set st e in whole_event holds
ex x being Element of S st e in event_pick (x,s)
proof end;

definition
let S be finite set ;
let s be FinSequence of S;
let x be set ;
func FDprobability (x,s) -> Real equals :: DIST_1:def 2
(frequency (x,s)) / (len s);
correctness
coherence
(frequency (x,s)) / (len s) is Real
;
;
end;

:: deftheorem defines FDprobability DIST_1:def 2 :
for S being finite set
for s being FinSequence of S
for x being set holds FDprobability (x,s) = (frequency (x,s)) / (len s);

theorem :: DIST_1:3
canceled;

::$CT
theorem :: DIST_1:4
for S being finite set
for s being FinSequence of S
for x being set holds frequency (x,s) = (len s) * (FDprobability (x,s))
proof end;

definition
let S be finite set ;
let s be FinSequence of S;
func FDprobSEQ s -> FinSequence of REAL means :Def3: :: DIST_1:def 3
( dom it = Seg (card S) & ( for n being Nat st n in dom it holds
it . n = FDprobability (((canFS S) . n),s) ) );
existence
ex b1 being FinSequence of REAL st
( dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds
b1 . n = FDprobability (((canFS S) . n),s) ) )
proof end;
uniqueness
for b1, b2 being FinSequence of REAL st dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds
b1 . n = FDprobability (((canFS S) . n),s) ) & dom b2 = Seg (card S) & ( for n being Nat st n in dom b2 holds
b2 . n = FDprobability (((canFS S) . n),s) ) holds
b1 = b2
proof end;
end;

:: deftheorem Def3 defines FDprobSEQ DIST_1:def 3 :
for S being finite set
for s being FinSequence of S
for b3 being FinSequence of REAL holds
( b3 = FDprobSEQ s iff ( dom b3 = Seg (card S) & ( for n being Nat st n in dom b3 holds
b3 . n = FDprobability (((canFS S) . n),s) ) ) );

theorem :: DIST_1:5
for S being finite set
for s being FinSequence of S
for x being set holds FDprobability (x,s) = prob (event_pick (x,s)) by CARD_1:62;

definition
let S be finite set ;
let s, t be FinSequence of S;
pred s,t -are_prob_equivalent means :: DIST_1:def 4
for x being set holds FDprobability (x,s) = FDprobability (x,t);
reflexivity
for s being FinSequence of S
for x being set holds FDprobability (x,s) = FDprobability (x,s)
;
symmetry
for s, t being FinSequence of S st ( for x being set holds FDprobability (x,s) = FDprobability (x,t) ) holds
for x being set holds FDprobability (x,t) = FDprobability (x,s)
;
end;

:: deftheorem defines -are_prob_equivalent DIST_1:def 4 :
for S being finite set
for s, t being FinSequence of S holds
( s,t -are_prob_equivalent iff for x being set holds FDprobability (x,s) = FDprobability (x,t) );

theorem Th4: :: DIST_1:6
for S being finite set
for s, t, u being FinSequence of S st s,t -are_prob_equivalent & t,u -are_prob_equivalent holds
s,u -are_prob_equivalent
proof end;

definition
let S be finite set ;
let s be FinSequence of S;
func Finseq-EQclass s -> Subset of (S *) equals :: DIST_1:def 5
{ t where t is FinSequence of S : s,t -are_prob_equivalent } ;
correctness
coherence
{ t where t is FinSequence of S : s,t -are_prob_equivalent } is Subset of (S *)
;
proof end;
end;

:: deftheorem defines Finseq-EQclass DIST_1:def 5 :
for S being finite set
for s being FinSequence of S holds Finseq-EQclass s = { t where t is FinSequence of S : s,t -are_prob_equivalent } ;

registration
let S be finite set ;
let s be FinSequence of S;
cluster Finseq-EQclass s -> non empty ;
coherence
not Finseq-EQclass s is empty
proof end;
end;

theorem Th5: :: DIST_1:7
for S being finite set
for s, t being FinSequence of S holds
( s,t -are_prob_equivalent iff t in Finseq-EQclass s )
proof end;

theorem Th6: :: DIST_1:8
for S being finite set
for s being FinSequence of S holds s in Finseq-EQclass s ;

theorem Th7: :: DIST_1:9
for S being finite set
for s, t being FinSequence of S holds
( s,t -are_prob_equivalent iff Finseq-EQclass s = Finseq-EQclass t )
proof end;

definition
let S be finite set ;
defpred S1[ set ] means ex u being FinSequence of S st $1 = Finseq-EQclass u;
func distribution_family S -> Subset-Family of (S *) means :Def6: :: DIST_1:def 6
for A being Subset of (S *) holds
( A in it iff ex s being FinSequence of S st A = Finseq-EQclass s );
existence
ex b1 being Subset-Family of (S *) st
for A being Subset of (S *) holds
( A in b1 iff ex s being FinSequence of S st A = Finseq-EQclass s )
proof end;
uniqueness
for b1, b2 being Subset-Family of (S *) st ( for A being Subset of (S *) holds
( A in b1 iff ex s being FinSequence of S st A = Finseq-EQclass s ) ) & ( for A being Subset of (S *) holds
( A in b2 iff ex s being FinSequence of S st A = Finseq-EQclass s ) ) holds
b1 = b2
proof end;
end;

:: deftheorem Def6 defines distribution_family DIST_1:def 6 :
for S being finite set
for b2 being Subset-Family of (S *) holds
( b2 = distribution_family S iff for A being Subset of (S *) holds
( A in b2 iff ex s being FinSequence of S st A = Finseq-EQclass s ) );

registration
let S be finite set ;
cluster distribution_family S -> non empty ;
coherence
not distribution_family S is empty
proof end;
end;

theorem Th8: :: DIST_1:10
for S being finite set
for s, t being FinSequence of S holds
( s,t -are_prob_equivalent iff FDprobSEQ s = FDprobSEQ t )
proof end;

theorem :: DIST_1:11
for S being finite set
for s, t being FinSequence of S st t in Finseq-EQclass s holds
FDprobSEQ s = FDprobSEQ t
proof end;

definition
let S be finite set ;
func GenProbSEQ S -> Function of (distribution_family S),(REAL *) means :Def7: :: DIST_1:def 7
for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & it . x = FDprobSEQ s );
existence
ex b1 being Function of (distribution_family S),(REAL *) st
for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & b1 . x = FDprobSEQ s )
proof end;
uniqueness
for b1, b2 being Function of (distribution_family S),(REAL *) st ( for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & b1 . x = FDprobSEQ s ) ) & ( for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & b2 . x = FDprobSEQ s ) ) holds
b1 = b2
proof end;
end;

:: deftheorem Def7 defines GenProbSEQ DIST_1:def 7 :
for S being finite set
for b2 being Function of (distribution_family S),(REAL *) holds
( b2 = GenProbSEQ S iff for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & b2 . x = FDprobSEQ s ) );

theorem Th10: :: DIST_1:12
for S being finite set
for s being FinSequence of S holds (GenProbSEQ S) . (Finseq-EQclass s) = FDprobSEQ s
proof end;

registration
let S be finite set ;
cluster GenProbSEQ S -> one-to-one ;
coherence
GenProbSEQ S is one-to-one
proof end;
end;

definition
let S be finite set ;
let p be ProbFinS FinSequence of REAL ;
assume A1: ex s being FinSequence of S st FDprobSEQ s = p ;
func distribution (p,S) -> Element of distribution_family S means :Def8: :: DIST_1:def 8
(GenProbSEQ S) . it = p;
existence
ex b1 being Element of distribution_family S st (GenProbSEQ S) . b1 = p
proof end;
uniqueness
for b1, b2 being Element of distribution_family S st (GenProbSEQ S) . b1 = p & (GenProbSEQ S) . b2 = p holds
b1 = b2
proof end;
end;

:: deftheorem Def8 defines distribution DIST_1:def 8 :
for S being finite set
for p being ProbFinS FinSequence of REAL st ex s being FinSequence of S st FDprobSEQ s = p holds
for b3 being Element of distribution_family S holds
( b3 = distribution (p,S) iff (GenProbSEQ S) . b3 = p );

definition
let S be finite set ;
let s be FinSequence of S;
func freqSEQ s -> FinSequence of NAT means :Def9: :: DIST_1:def 9
( dom it = Seg (card S) & ( for n being Nat st n in dom it holds
it . n = (len s) * ((FDprobSEQ s) . n) ) );
existence
ex b1 being FinSequence of NAT st
( dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds
b1 . n = (len s) * ((FDprobSEQ s) . n) ) )
proof end;
uniqueness
for b1, b2 being FinSequence of NAT st dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds
b1 . n = (len s) * ((FDprobSEQ s) . n) ) & dom b2 = Seg (card S) & ( for n being Nat st n in dom b2 holds
b2 . n = (len s) * ((FDprobSEQ s) . n) ) holds
b1 = b2
proof end;
end;

:: deftheorem Def9 defines freqSEQ DIST_1:def 9 :
for S being finite set
for s being FinSequence of S
for b3 being FinSequence of NAT holds
( b3 = freqSEQ s iff ( dom b3 = Seg (card S) & ( for n being Nat st n in dom b3 holds
b3 . n = (len s) * ((FDprobSEQ s) . n) ) ) );

theorem Th11: :: DIST_1:13
for S being non empty finite set
for s being non empty FinSequence of S
for n being Nat st n in Seg (card S) holds
ex x being Element of S st
( (freqSEQ s) . n = frequency (x,s) & x = (canFS S) . n )
proof end;

theorem Th12: :: DIST_1:14
for S being finite set
for s being FinSequence of S holds freqSEQ s = (len s) * (FDprobSEQ s)
proof end;

theorem Th13: :: DIST_1:15
for S being finite set
for s being FinSequence of S holds Sum (freqSEQ s) = (len s) * (Sum (FDprobSEQ s))
proof end;

theorem :: DIST_1:16
for S being non empty finite set
for s being non empty FinSequence of S
for n being Nat st n in dom s holds
ex m being Nat st
( (freqSEQ s) . m = frequency ((s . n),s) & s . n = (canFS S) . m )
proof end;

Lm1: for S being Function
for X being set st S is disjoint_valued holds
S | X is disjoint_valued

proof end;

Lm2: for n being Nat
for S being Function
for L being FinSequence of NAT st S is disjoint_valued & dom S = dom L & n = len L & ( for i being Nat st i in dom S holds
( S . i is finite & L . i = card (S . i) ) ) holds
( Union S is finite & card (Union S) = Sum L )

proof end;

theorem Th15: :: DIST_1:17
for S being Function
for L being FinSequence of NAT st S is disjoint_valued & dom S = dom L & ( for i being Nat st i in dom S holds
( S . i is finite & L . i = card (S . i) ) ) holds
( Union S is finite & card (Union S) = Sum L )
proof end;

theorem Th16: :: DIST_1:18
for S being non empty finite set
for s being non empty FinSequence of S holds Sum (freqSEQ s) = len s
proof end;

theorem Th17: :: DIST_1:19
for S being non empty finite set
for s being non empty FinSequence of S holds Sum (FDprobSEQ s) = 1
proof end;

registration
let S be non empty finite set ;
let s be non empty FinSequence of S;
cluster FDprobSEQ s -> ProbFinS ;
coherence
FDprobSEQ s is ProbFinS
proof end;
end;

definition
let S be non empty finite set ;
mode distProbFinS of S -> ProbFinS FinSequence of REAL means :Def10: :: DIST_1:def 10
( len it = card S & ex s being FinSequence of S st FDprobSEQ s = it );
existence
ex b1 being ProbFinS FinSequence of REAL st
( len b1 = card S & ex s being FinSequence of S st FDprobSEQ s = b1 )
proof end;
end;

:: deftheorem Def10 defines distProbFinS DIST_1:def 10 :
for S being non empty finite set
for b2 being ProbFinS FinSequence of REAL holds
( b2 is distProbFinS of S iff ( len b2 = card S & ex s being FinSequence of S st FDprobSEQ s = b2 ) );

theorem Th18: :: DIST_1:20
for S being non empty finite set
for p being distProbFinS of S holds
( distribution (p,S) is Element of distribution_family S & (GenProbSEQ S) . (distribution (p,S)) = p )
proof end;

definition
let S be finite set ;
let s be FinSequence of S;
attr s is uniformly_distributed means :: DIST_1:def 11
for n being Nat st n in dom (FDprobSEQ s) holds
(FDprobSEQ s) . n = 1 / (card S);
end;

:: deftheorem defines uniformly_distributed DIST_1:def 11 :
for S being finite set
for s being FinSequence of S holds
( s is uniformly_distributed iff for n being Nat st n in dom (FDprobSEQ s) holds
(FDprobSEQ s) . n = 1 / (card S) );

theorem Th19: :: DIST_1:21
for S being finite set
for s being FinSequence of S st s is uniformly_distributed holds
FDprobSEQ s is constant
proof end;

theorem Th20: :: DIST_1:22
for S being finite set
for s, t being FinSequence of S st s is uniformly_distributed & s,t -are_prob_equivalent holds
t is uniformly_distributed
proof end;

theorem Th21: :: DIST_1:23
for S being finite set
for s, t being FinSequence of S st s is uniformly_distributed & t is uniformly_distributed holds
s,t -are_prob_equivalent
proof end;

registration
let S be finite set ;
cluster canFS S -> uniformly_distributed for FinSequence of S;
coherence
for b1 being FinSequence of S st b1 = canFS S holds
b1 is uniformly_distributed
proof end;
end;

Lm3: for S being finite set
for s being FinSequence of S st s in Finseq-EQclass (canFS S) holds
s is uniformly_distributed

by Th5, Th20;

Lm4: for S being finite set
for s being FinSequence of S st s is uniformly_distributed holds
for t being FinSequence of S st t is uniformly_distributed holds
t in Finseq-EQclass s

proof end;

definition
let S be finite set ;
func uniform_distribution S -> Element of distribution_family S means :Def12: :: DIST_1:def 12
for s being FinSequence of S holds
( s in it iff s is uniformly_distributed );
existence
ex b1 being Element of distribution_family S st
for s being FinSequence of S holds
( s in b1 iff s is uniformly_distributed )
proof end;
uniqueness
for b1, b2 being Element of distribution_family S st ( for s being FinSequence of S holds
( s in b1 iff s is uniformly_distributed ) ) & ( for s being FinSequence of S holds
( s in b2 iff s is uniformly_distributed ) ) holds
b1 = b2
proof end;
end;

:: deftheorem Def12 defines uniform_distribution DIST_1:def 12 :
for S being finite set
for b2 being Element of distribution_family S holds
( b2 = uniform_distribution S iff for s being FinSequence of S holds
( s in b2 iff s is uniformly_distributed ) );

registration
let S be non empty finite set ;
cluster Relation-like NAT -defined REAL -valued non empty Function-like constant finite V47() V48() V49() FinSequence-like FinSubsequence-like V77() ProbFinS for distProbFinS of S;
existence
ex b1 being distProbFinS of S st b1 is constant
proof end;
end;

definition
let S be non empty finite set ;
func Uniform_FDprobSEQ S -> distProbFinS of S equals :: DIST_1:def 13
FDprobSEQ (canFS S);
coherence
FDprobSEQ (canFS S) is distProbFinS of S
proof end;
end;

:: deftheorem defines Uniform_FDprobSEQ DIST_1:def 13 :
for S being non empty finite set holds Uniform_FDprobSEQ S = FDprobSEQ (canFS S);

registration
let S be non empty finite set ;
cluster Uniform_FDprobSEQ S -> constant ;
coherence
Uniform_FDprobSEQ S is constant
by Th19;
end;

theorem :: DIST_1:24
for S being non empty finite set holds uniform_distribution S = distribution ((Uniform_FDprobSEQ S),S)
proof end;