AreaLighting.hlsl 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. #ifndef UNITY_AREA_LIGHTING_INCLUDED
  2. #define UNITY_AREA_LIGHTING_INCLUDED
  3. #define APPROXIMATE_POLY_LIGHT_AS_SPHERE_LIGHT
  4. #define APPROXIMATE_SPHERE_LIGHT_NUMERICALLY
  5. // Not normalized by the factor of 1/TWO_PI.
  6. real3 ComputeEdgeFactor(real3 V1, real3 V2)
  7. {
  8. real V1oV2 = dot(V1, V2);
  9. real3 V1xV2 = cross(V1, V2);
  10. #if 0
  11. return normalize(V1xV2) * acos(V1oV2));
  12. #else
  13. // Approximate: { y = rsqrt(1.0 - V1oV2 * V1oV2) * acos(V1oV2) } on [0, 1].
  14. // Fit: HornerForm[MiniMaxApproximation[ArcCos[x]/Sqrt[1 - x^2], {x, {0, 1 - $MachineEpsilon}, 6, 0}][[2, 1]]].
  15. // Maximum relative error: 2.6855360216340534 * 10^-6. Intensities up to 1000 are artifact-free.
  16. real x = abs(V1oV2);
  17. real y = 1.5707921083647782 + x * (-0.9995697178013095 + x * (0.778026455830408 + x * (-0.6173111361273548 + x * (0.4202724111150622 + x * (-0.19452783598217288 + x * 0.04232040013661036)))));
  18. if (V1oV2 < 0)
  19. {
  20. // Undo range reduction.
  21. y = PI * rsqrt(saturate(1 - V1oV2 * V1oV2)) - y;
  22. }
  23. return V1xV2 * y;
  24. #endif
  25. }
  26. // Not normalized by the factor of 1/TWO_PI.
  27. // Ref: Improving radiosity solutions through the use of analytically determined form-factors.
  28. real IntegrateEdge(real3 V1, real3 V2)
  29. {
  30. // 'V1' and 'V2' are represented in a coordinate system with N = (0, 0, 1).
  31. return ComputeEdgeFactor(V1, V2).z;
  32. }
  33. // 'sinSqSigma' is the sine^2 of the half-angle subtended by the sphere (aperture) as seen from the shaded point.
  34. // 'cosOmega' is the cosine of the angle between the normal and the direction to the center of the light.
  35. // N.b.: this function accounts for horizon clipping.
  36. real DiffuseSphereLightIrradiance(real sinSqSigma, real cosOmega)
  37. {
  38. #ifdef APPROXIMATE_SPHERE_LIGHT_NUMERICALLY
  39. real x = sinSqSigma;
  40. real y = cosOmega;
  41. // Use a numerical fit found in Mathematica. Mean absolute error: 0.00476944.
  42. // You can use the following Mathematica code to reproduce our results:
  43. // t = Flatten[Table[{x, y, f[x, y]}, {x, 0, 0.999999, 0.001}, {y, -0.999999, 0.999999, 0.002}], 1]
  44. // m = NonlinearModelFit[t, x * (y + e) * (0.5 + (y - e) * (a + b * x + c * x^2 + d * x^3)), {a, b, c, d, e}, {x, y}]
  45. return saturate(x * (0.9245867471551246 + y) * (0.5 + (-0.9245867471551246 + y) * (0.5359050373687144 + x * (-1.0054221851257754 + x * (1.8199061187417047 - x * 1.3172081704209504)))));
  46. #else
  47. #if 0 // Ref: Area Light Sources for Real-Time Graphics, page 4 (1996).
  48. real sinSqOmega = saturate(1 - cosOmega * cosOmega);
  49. real cosSqSigma = saturate(1 - sinSqSigma);
  50. real sinSqGamma = saturate(cosSqSigma / sinSqOmega);
  51. real cosSqGamma = saturate(1 - sinSqGamma);
  52. real sinSigma = sqrt(sinSqSigma);
  53. real sinGamma = sqrt(sinSqGamma);
  54. real cosGamma = sqrt(cosSqGamma);
  55. real sigma = asin(sinSigma);
  56. real omega = acos(cosOmega);
  57. real gamma = asin(sinGamma);
  58. if (omega >= HALF_PI + sigma)
  59. {
  60. // Full horizon occlusion (case #4).
  61. return 0;
  62. }
  63. real e = sinSqSigma * cosOmega;
  64. UNITY_BRANCH
  65. if (omega < HALF_PI - sigma)
  66. {
  67. // No horizon occlusion (case #1).
  68. return e;
  69. }
  70. else
  71. {
  72. real g = (-2 * sqrt(sinSqOmega * cosSqSigma) + sinGamma) * cosGamma + (HALF_PI - gamma);
  73. real h = cosOmega * (cosGamma * sqrt(saturate(sinSqSigma - cosSqGamma)) + sinSqSigma * asin(saturate(cosGamma / sinSigma)));
  74. if (omega < HALF_PI)
  75. {
  76. // Partial horizon occlusion (case #2).
  77. return saturate(e + INV_PI * (g - h));
  78. }
  79. else
  80. {
  81. // Partial horizon occlusion (case #3).
  82. return saturate(INV_PI * (g + h));
  83. }
  84. }
  85. #else // Ref: Moving Frostbite to Physically Based Rendering, page 47 (2015, optimized).
  86. real cosSqOmega = cosOmega * cosOmega; // y^2
  87. UNITY_BRANCH
  88. if (cosSqOmega > sinSqSigma) // (y^2)>x
  89. {
  90. return saturate(sinSqSigma * cosOmega); // Clip[x*y,{0,1}]
  91. }
  92. else
  93. {
  94. real cotSqSigma = rcp(sinSqSigma) - 1; // 1/x-1
  95. real tanSqSigma = rcp(cotSqSigma); // x/(1-x)
  96. real sinSqOmega = 1 - cosSqOmega; // 1-y^2
  97. real w = sinSqOmega * tanSqSigma; // (1-y^2)*(x/(1-x))
  98. real x = -cosOmega * rsqrt(w); // -y*Sqrt[(1/x-1)/(1-y^2)]
  99. real y = sqrt(sinSqOmega * tanSqSigma - cosSqOmega); // Sqrt[(1-y^2)*(x/(1-x))-y^2]
  100. real z = y * cotSqSigma; // Sqrt[(1-y^2)*(x/(1-x))-y^2]*(1/x-1)
  101. real a = cosOmega * acos(x) - z; // y*ArcCos[-y*Sqrt[(1/x-1)/(1-y^2)]]-Sqrt[(1-y^2)*(x/(1-x))-y^2]*(1/x-1)
  102. real b = atan(y); // ArcTan[Sqrt[(1-y^2)*(x/(1-x))-y^2]]
  103. return saturate(INV_PI * (a * sinSqSigma + b));
  104. }
  105. #endif
  106. #endif
  107. }
  108. // This function does not check whether light's contribution is 0.
  109. real3 PolygonFormFactor(real4x3 L)
  110. {
  111. L[0] = normalize(L[0]);
  112. L[1] = normalize(L[1]);
  113. L[2] = normalize(L[2]);
  114. L[3] = normalize(L[3]);
  115. real3 F = ComputeEdgeFactor(L[0], L[1]);
  116. F += ComputeEdgeFactor(L[1], L[2]);
  117. F += ComputeEdgeFactor(L[2], L[3]);
  118. F += ComputeEdgeFactor(L[3], L[0]);
  119. return INV_TWO_PI * F;
  120. }
  121. // See "Real-Time Area Lighting: a Journey from Research to Production", slide 102.
  122. // Turns out, despite the authors claiming that this function "calculates an approximation of
  123. // the clipped sphere form factor", that is simply not true.
  124. // First of all, above horizon, the function should then just return 'F.z', which it does not.
  125. // Secondly, if we use the correct function called DiffuseSphereLightIrradiance(), it results
  126. // in severe light leaking if the light is placed vertically behind the camera.
  127. // So this function is clearly a hack designed to work around these problems.
  128. real PolygonIrradianceFromVectorFormFactor(float3 F)
  129. {
  130. #if 1
  131. float l = length(F);
  132. return max(0, (l * l + F.z) / (l + 1));
  133. #else
  134. real sff = saturate(dot(F, F));
  135. real sinSqAperture = sqrt(sff);
  136. real cosElevationAngle = F.z * rsqrt(sff);
  137. return DiffuseSphereLightIrradiance(sinSqAperture, cosElevationAngle);
  138. #endif
  139. }
  140. // Expects non-normalized vertex positions.
  141. real PolygonIrradiance(real4x3 L)
  142. {
  143. #ifdef APPROXIMATE_POLY_LIGHT_AS_SPHERE_LIGHT
  144. real3 F = PolygonFormFactor(L);
  145. return PolygonIrradianceFromVectorFormFactor(F);
  146. #else
  147. // 1. ClipQuadToHorizon
  148. // detect clipping config
  149. uint config = 0;
  150. if (L[0].z > 0) config += 1;
  151. if (L[1].z > 0) config += 2;
  152. if (L[2].z > 0) config += 4;
  153. if (L[3].z > 0) config += 8;
  154. // The fifth vertex for cases when clipping cuts off one corner.
  155. // Due to a compiler bug, copying L into a vector array with 5 rows
  156. // messes something up, so we need to stick with the matrix + the L4 vertex.
  157. real3 L4 = L[3];
  158. // This switch is surprisingly fast. Tried replacing it with a lookup array of vertices.
  159. // Even though that replaced the switch with just some indexing and no branches, it became
  160. // way, way slower - mem fetch stalls?
  161. // clip
  162. uint n = 0;
  163. switch (config)
  164. {
  165. case 0: // clip all
  166. break;
  167. case 1: // V1 clip V2 V3 V4
  168. n = 3;
  169. L[1] = -L[1].z * L[0] + L[0].z * L[1];
  170. L[2] = -L[3].z * L[0] + L[0].z * L[3];
  171. break;
  172. case 2: // V2 clip V1 V3 V4
  173. n = 3;
  174. L[0] = -L[0].z * L[1] + L[1].z * L[0];
  175. L[2] = -L[2].z * L[1] + L[1].z * L[2];
  176. break;
  177. case 3: // V1 V2 clip V3 V4
  178. n = 4;
  179. L[2] = -L[2].z * L[1] + L[1].z * L[2];
  180. L[3] = -L[3].z * L[0] + L[0].z * L[3];
  181. break;
  182. case 4: // V3 clip V1 V2 V4
  183. n = 3;
  184. L[0] = -L[3].z * L[2] + L[2].z * L[3];
  185. L[1] = -L[1].z * L[2] + L[2].z * L[1];
  186. break;
  187. case 5: // V1 V3 clip V2 V4: impossible
  188. break;
  189. case 6: // V2 V3 clip V1 V4
  190. n = 4;
  191. L[0] = -L[0].z * L[1] + L[1].z * L[0];
  192. L[3] = -L[3].z * L[2] + L[2].z * L[3];
  193. break;
  194. case 7: // V1 V2 V3 clip V4
  195. n = 5;
  196. L4 = -L[3].z * L[0] + L[0].z * L[3];
  197. L[3] = -L[3].z * L[2] + L[2].z * L[3];
  198. break;
  199. case 8: // V4 clip V1 V2 V3
  200. n = 3;
  201. L[0] = -L[0].z * L[3] + L[3].z * L[0];
  202. L[1] = -L[2].z * L[3] + L[3].z * L[2];
  203. L[2] = L[3];
  204. break;
  205. case 9: // V1 V4 clip V2 V3
  206. n = 4;
  207. L[1] = -L[1].z * L[0] + L[0].z * L[1];
  208. L[2] = -L[2].z * L[3] + L[3].z * L[2];
  209. break;
  210. case 10: // V2 V4 clip V1 V3: impossible
  211. break;
  212. case 11: // V1 V2 V4 clip V3
  213. n = 5;
  214. L[3] = -L[2].z * L[3] + L[3].z * L[2];
  215. L[2] = -L[2].z * L[1] + L[1].z * L[2];
  216. break;
  217. case 12: // V3 V4 clip V1 V2
  218. n = 4;
  219. L[1] = -L[1].z * L[2] + L[2].z * L[1];
  220. L[0] = -L[0].z * L[3] + L[3].z * L[0];
  221. break;
  222. case 13: // V1 V3 V4 clip V2
  223. n = 5;
  224. L[3] = L[2];
  225. L[2] = -L[1].z * L[2] + L[2].z * L[1];
  226. L[1] = -L[1].z * L[0] + L[0].z * L[1];
  227. break;
  228. case 14: // V2 V3 V4 clip V1
  229. n = 5;
  230. L4 = -L[0].z * L[3] + L[3].z * L[0];
  231. L[0] = -L[0].z * L[1] + L[1].z * L[0];
  232. break;
  233. case 15: // V1 V2 V3 V4
  234. n = 4;
  235. break;
  236. }
  237. if (n == 0) return 0;
  238. // 2. Project onto sphere
  239. L[0] = normalize(L[0]);
  240. L[1] = normalize(L[1]);
  241. L[2] = normalize(L[2]);
  242. switch (n)
  243. {
  244. case 3:
  245. L[3] = L[0];
  246. break;
  247. case 4:
  248. L[3] = normalize(L[3]);
  249. L4 = L[0];
  250. break;
  251. case 5:
  252. L[3] = normalize(L[3]);
  253. L4 = normalize(L4);
  254. break;
  255. }
  256. // 3. Integrate
  257. real sum = 0;
  258. sum += IntegrateEdge(L[0], L[1]);
  259. sum += IntegrateEdge(L[1], L[2]);
  260. sum += IntegrateEdge(L[2], L[3]);
  261. if (n >= 4)
  262. sum += IntegrateEdge(L[3], L4);
  263. if (n == 5)
  264. sum += IntegrateEdge(L4, L[0]);
  265. sum *= INV_TWO_PI; // Normalization
  266. sum = max(sum, 0.0);
  267. return isfinite(sum) ? sum : 0.0;
  268. #endif
  269. }
  270. real LineFpo(real tLDDL, real lrcpD, real rcpD)
  271. {
  272. // Compute: ((l / d) / (d * d + l * l)) + (1.0 / (d * d)) * atan(l / d).
  273. return tLDDL + (rcpD * rcpD) * FastATan(lrcpD);
  274. }
  275. real LineFwt(real tLDDL, real l)
  276. {
  277. // Compute: l * ((l / d) / (d * d + l * l)).
  278. return l * tLDDL;
  279. }
  280. // Computes the integral of the clamped cosine over the line segment.
  281. // 'l1' and 'l2' define the integration interval.
  282. // 'tangent' is the line's tangent direction.
  283. // 'normal' is the direction orthogonal to the tangent. It is the shortest vector between
  284. // the shaded point and the line, pointing away from the shaded point.
  285. real LineIrradiance(real l1, real l2, real3 normal, real3 tangent)
  286. {
  287. real d = length(normal);
  288. real l1rcpD = l1 * rcp(d);
  289. real l2rcpD = l2 * rcp(d);
  290. real tLDDL1 = l1rcpD / (d * d + l1 * l1);
  291. real tLDDL2 = l2rcpD / (d * d + l2 * l2);
  292. real intWt = LineFwt(tLDDL2, l2) - LineFwt(tLDDL1, l1);
  293. real intP0 = LineFpo(tLDDL2, l2rcpD, rcp(d)) - LineFpo(tLDDL1, l1rcpD, rcp(d));
  294. return intP0 * normal.z + intWt * tangent.z;
  295. }
  296. // Computes 1.0 / length(mul(ortho, transpose(inverse(invM)))).
  297. real ComputeLineWidthFactor(real3x3 invM, real3 ortho)
  298. {
  299. // transpose(inverse(M)) = (1.0 / determinant(M)) * cofactor(M).
  300. // Take into account that m12 = m21 = m23 = m32 = 0 and m33 = 1.
  301. real det = invM._11 * invM._22 - invM._22 * invM._31 * invM._13;
  302. real3x3 cof = {invM._22, 0.0, -invM._22 * invM._31,
  303. 0.0, invM._11 - invM._13 * invM._31, 0.0,
  304. -invM._13 * invM._22, 0.0, invM._11 * invM._22};
  305. // 1.0 / length(mul(V, (1.0 / s * M))) = abs(s) / length(mul(V, M)).
  306. return abs(det) / length(mul(ortho, cof));
  307. }
  308. // For line lights.
  309. real LTCEvaluate(real3 P1, real3 P2, real3 B, real3x3 invM)
  310. {
  311. real result = 0.0;
  312. // Inverse-transform the endpoints.
  313. P1 = mul(P1, invM);
  314. P2 = mul(P2, invM);
  315. // Terminate the algorithm if both points are below the horizon.
  316. if (!(P1.z <= 0.0 && P2.z <= 0.0))
  317. {
  318. real width = ComputeLineWidthFactor(invM, B);
  319. if (P1.z > P2.z)
  320. {
  321. // Convention: 'P2' is above 'P1', with the tangent pointing upwards.
  322. Swap(P1, P2);
  323. }
  324. // Recompute the length and the tangent in the new coordinate system.
  325. real len = length(P2 - P1);
  326. real3 T = normalize(P2 - P1);
  327. // Clip the part of the light below the horizon.
  328. if (P1.z <= 0.0)
  329. {
  330. // P = P1 + t * T; P.z == 0.
  331. real t = -P1.z / T.z;
  332. P1 = real3(P1.xy + t * T.xy, 0.0);
  333. // Set the length of the visible part of the light.
  334. len -= t;
  335. }
  336. // Compute the normal direction to the line, s.t. it is the shortest vector
  337. // between the shaded point and the line, pointing away from the shaded point.
  338. // Can be interpreted as a point on the line, since the shaded point is at the origin.
  339. real proj = dot(P1, T);
  340. real3 P0 = P1 - proj * T;
  341. // Compute the parameterization: distances from 'P1' and 'P2' to 'P0'.
  342. real l1 = proj;
  343. real l2 = l1 + len;
  344. // Integrate the clamped cosine over the line segment.
  345. real irradiance = LineIrradiance(l1, l2, P0, T);
  346. // Guard against numerical precision issues.
  347. result = max(INV_PI * width * irradiance, 0.0);
  348. }
  349. return result;
  350. }
  351. #endif // UNITY_AREA_LIGHTING_INCLUDED