FUNCTION TWOLINE,U,V,WX,WY
;+
; Part of FITXYERRS.
;-
EPS=1.0E-20
N=N_ELEMENTS(U)
IF WY GT WX THEN BEGIN
S=SORT(U)
TX=U(S) & TY=V(S)
X1=MED(TX(0:.5*N)) & X2=MED(TX(.5*N+1:N-1))
Y1=MED(TY(0:.5*N)) & Y2=MED(TY(.5*N+1:N-1))
IF ABS(X1-X2) LT EPS THEN BEGIN
PRINT,'FITXYERRS: Initial Fit Failed. Range of X = 0.'
RETURN,0.
ENDIF
SLOP=(Y1-Y2)/(X1-X2)
YINT=Y1-SLOP*X1
ENDIF ELSE BEGIN ; X on Y
S=SORT(V)
TX=V(S) & TY=U(S)
X1=MED(TX(0:.5*N)) & X2=MED(TX(.5*N+1:N-1))
Y1=MED(TY(0:.5*N)) & Y2=MED(TY(.5*N+1:N-1))
IF ABS(X1-X2) LT EPS THEN BEGIN
PRINT,'FITXYERRS: Initial Fit Failed. Range of Y = 0.'
RETURN,0.
ENDIF
TSLOP=(Y1-Y2)/(X1-X2)
TYINT=Y1-TSLOP*X1
IF ABS(TSLOP) LT EPS THEN TSLOP=EPS
SLOP=1./TSLOP
YINT=-TYINT/TSLOP
ENDELSE
RETURN,[YINT,SLOP]
END
FUNCTION FITXYERRS, Xin,Yin,EXin,EYin, N_SAMPLE,YinT,SLOP,SYinT,SSLOP,$
NONROBUST=FITTYPE, TOLERANCE=SMALLENUF
;
;+
; NAME:
; FITXYERRS
;
; PURPOSE:
; Linear fit to data in which there are errors in both Y and X, and
; possibly outliers or points with true errors much larger than the
; measurement errors. It is outlier-resistant but not as resistant as
; ROBUST_LINEFIT. Uncertainties are calculated using the bootstrap
; method.
;
; CALLING SEQUENCE:
; Coeff = FITXYERRS( X, Y, Ex, Ey, N_sample, [ Yint, Slope, Sig_Yint,
; Sig_Slope, /NONROBUST, Tolerance = ] )
;
; INPUT ARGUMENT:
; X = Input x vector
; Y = Input y vector
; Ex= vector of X measurement errors
; Ey= the vector of Y measurement errors
; N_SAMPLE = the number of bootstrap samples. 50 to 100 usually sufficient
; If N_SAMPLE = 1, standard errors are not returned.
;
; OPTIONAL INPUT KEYWORDS:
; NONROBUST - If set, "robustness" weights are not calculated.
; TOLERANCE - When the scatter about the fitted line, weighted by
; measurement errors, changes by less than this fraction,
; the fit is considered to have converged. Default=.0001
;
; OUTPUT:
; COEFF = array of coefficients. Dimensions=(NDEG+1,N_SAMPLE)
; First dimension in the order A0, A1
;
; OPTIONAL OUTPUT ARGUMENT:
; Yint = Y intercept (average of COEFF(0,*))
; Slop = slope (average of (COEFF(1,*))
; Sig_Yint = standard error of Y intercept (std. dev. of COEFF(0,*))
; Sig_Slop = standard error of slope (std. dev. of COEFF(1,*))
;
; METHOD:
; Convert to standardized variables, calculate Y vs X and X vs Y fits,
; take a weighted mean of the two lines, the weights being a function of
; slope. The weights of the input points are functions of their
; uncertainties and perpendicular distance to the fitted line. This is
; an iterative procedure. To the author's knowledge, neither this nor
; a similar method has ever been, or may ever be, published.
;
; WARNING:
; At least 6 points must be input. It is better to have at least 10.
; It is best to have a large number.
; For large datasets, try it out with N_SAMPLE of 1 first to see how long
; it takes.
;
; SUBROUTINES CALLED:
; XYFIT, MED (to calculate a median)
;
; REVSION HISTORY:
; Written, H.T. Freudenreich, HSTX, 6/94.
; Corrected serious bug in computation of the Y intercept
; H.T. Freudenreich/Landsman HSTX, 11/95
;
;-
ON_ERROR,2
ITMAX = 20
EPS = 1.0E-22
N=N_ELEMENTS(Xin)
IF N LT 6 THEN BEGIN
PRINT,'FITXYERRS: Too few points! Returning 0'
RETURN,0.
ENDIF
IF KEYWORD_SET(SMALLENUF) THEN CONVERGENCE=SMALLENUF ELSE CONVERGENCE=.0001
; Shift the coordinate system:
X0=TOTAL(Xin)/N & Y0=TOTAL(Yin)/N
X=Xin-X0 & Y=Yin-Y0
EX=EXin & EY=EYin
R0=SYSTIME(1)-DOUBLE(7.) ;=random number seed
; Now convert to carefree variables by dividing by the median measurement
; errors. (If the errors are heteroscedastic, they are not quite as carefree.)
XERR = MED(EX)+EPS & YERR = MED(EY)+EPS
X=X/XERR & Y=Y/YERR
EX=EX/XERR & EY=EY/YERR
; Now the errors are roughly the same (exactly, if the variances are fixed).
; The slope will determine the relative weights of the Y|X and X|Y regressions.
; Initially, we don't know the slope, and so we give the most weight to the
; variable with the largest range.
AX=MED(ABS(X)) & AY=MED(ABS(Y))
WX=AY^2/(AX^2+AY^2) & WY=1.-WX ; = initial weights of X|Y and Y|X fits.
; Now perform the calculation N_SAMPLE times.
COEFF=FLTARR(2,N_SAMPLE)
NGOOD = -1
; Get some randomn number seeds:
R0=SYSTIME(1)*2.+1.
SEEDS=RANDOMU(R0,N_SAMPLE)*1.0E6+1.
FOR L=0,N_SAMPLE-1 DO BEGIN
IF L EQ 0 THEN BEGIN
; First time, use the actual data:
U=X & V=Y & Eu=Ex & Ev=Ey
ENDIF ELSE BEGIN
R=SEEDS(L)
; Uniform random numbers between 0 and N-1:
PICK=RANDOMU(R,N)
R=LONG(PICK*N)
U=X(R) & V=Y(R) & Eu=Ex(R) & Ev=Ey(R)
ENDELSE
; Fit Y vs X and X vs Y, iteratively:
NUMIT=0
MIDDIST=1.0E37
; For the initial fit, divide the data into 2 groups and calculate the line
; through them.
CC=TWOLINE(U,V,WX,WY)
; The "robustness" weights are functions of distance from the fitted line,
; so calculate that:
R=SQRT(1.+CC(1)^2) & IF CC(0) GT 0. THEN R=-R
U1=CC(1)/R & U2=-1./R & U3=CC(0)/R
DIST=U1*U+U2*V+U3 ; = orthog. distance to line
; Now iterate:
FITAGAIN:
NUMIT=NUMIT+1
IF N_ELEMENTS(CC) EQ 2 THEN BEGIN
; The weights will be a product of a function of the measurement errors
; and, for outlier-resistance, a function of orthogonal distance to the line.
; Measurement-error weights: For the Y on X fit, the weights should be
; 1/(Ey^2+b^2 Ex^2), where b is the slope of Y vs X. For X on Y:
; 1/(Ex^2+Ey^2/b^2). Since we are doing both, and want to treat X and Y
; symmetrically, take the weighted sum. The weights are functions of
; slope.
SLOP=CC(1)^2 > 1.0E-18 ; slope^2
DY= Ev^2 + CC(1)^2*Eu^2 + eps & DX= Eu^2 + Ev^2/SLOP + eps
; The relative weights of the Y|X and X|Y fits:
FY = 1./(1.+CC(1)^2) & FX = 1.-FY
; Now the "measurement error" weights:
WS= 1./DY * FY + 1./DX * FX
; Note:
; (When slope==>0, WS==>1/Ey^2 (Ex is irrelevant)
; When slope==>oo, WS==>1/Ex^2 (Ey is irrelevant)
; When slope==>1, WS==>1/(Ex^2+Ey^2) (X and Y indistinguishable))
IF KEYWORD_SET(FITTYPE) THEN BEGIN
; Don't need to calculate robustness weights or check for convergence.
; Fit once and move on to the next set, if there is one.
W=WS/TOTAL(WS)
CC=XYFIT(U,V,W,CCX,CCY,DIST,FX,FY)
GOTO, DONE
ENDIF
; From this calculate an "adjusted" distance (distance/measurement error)
; to be used in the robustness weights.
DIST=ABS(DIST)/SQRT( DY*FY + DX*FX ) ; = adjusted distance
DX=0 & DY=0
; Points < 1 *(mid-averaged adjusted distance from the fit line) are
; given equal distance weights. Beyond 1 unit of distance the weights
; decrease as 1/distance^2. Beyond 4 units, weights are set to 0.0.
;
; Why the mid-averaged distance? It is more sensitive than the median but
; has a lower breakdown point: 25% vs 50%.
D=DIST(SORT(DIST))
OLDMIDDIST=MIDDIST
MIDDIST=AVG(D(.25*N:.75*N))/.70 ; = 1 sigma if Gaussian
IF MIDDIST LT EPS THEN BEGIN
PRINT,'FITXYERRS: Weird data. Fit attempt failed'
CC=0.
GOTO,DONE
ENDIF
Q=WHERE(DIST LT MIDDIST,COUNT) & IF COUNT GT 0 THEN DIST(Q)=MIDDIST
W=WS/DIST^2
Q=WHERE(DIST GT (4.*MIDDIST),COUNT) & IF COUNT GT 0 THEN W(Q)=0.
SUMW = TOTAL(W)
IF SUMW EQ 0. THEN BEGIN
PRINT,'FITXYERRS: Weird data. Fit attempt failed'
CC=0.
GOTO,DONE
ENDIF
W=W/SUMW
IF NUMIT GT 1 THEN BEGIN
OLDCC=CC
DIFF=ABS(OLDMIDDIST-MIDDIST)/MIDDIST
IF DIFF LT CONVERGENCE THEN GOTO,DONE
IF MIDDIST GT OLDMIDDIST THEN BEGIN
CC=OLDCC
GOTO,DONE
ENDIF
ENDIF
IF NUMIT EQ ITMAX THEN GOTO,DONE
IF MIDDIST LT EPS THEN GOTO,DONE
; Fit again:
OLDCC=CC
CC=XYFIT(U,V,W,CCX,CCY,DIST,FX,FY)
GOTO,FITAGAIN
ENDIF
DONE:
; If the fit was successful, store the coefficients:
IF (N_ELEMENTS(CC) EQ 2) THEN BEGIN
NGOOD=NGOOD+1
COEFF(*,NGOOD)=CC
ENDIF
ENDFOR
NGOOD = NGOOD+1
IF NGOOD LT N_SAMPLE THEN BEGIN
PRINT,'FITXYERRS: Some samples rejected.'
COEFF=COEFF(*,0:NGOOD-1)
ENDIF
; Take out the scale factors:
COEFF(0,*)=COEFF(0,*)*YERR
COEFF(1,*)=COEFF(1,*)*YERR/XERR
; Shift X and Y back to the original coordinate system and calculate the
; y-intercept, slope and their standard errors.
SLOP = TOTAL(COEFF(1,*))/NGOOD
COEFF(0,*) = COEFF(0,*) + Y0 - COEFF(1,*)*X0 ;Bug corrected 11/95
YINT = TOTAL(COEFF(0,*))/NGOOD
IF NGOOD GT 2 THEN BEGIN
SYINT = STDEV(COEFF(0,*))
SSLOP = STDEV(COEFF(1,*))
ENDIF ELSE BEGIN
SYINT=0.
SSLOP=0.
ENDELSE
RETURN,COEFF
END