Cache Digest Diffs, first results

From: Henrik Nordstrom <hno@dont-contact.us>
Date: Wed, 11 Aug 1999 05:10:45 +0200

I have now completed a initial round of simulations of "Huffman encoded
bit change distance".

Initial results are:
%bits changed %improvement from cd_diff
0.50% 39%
0.60% 41%
1.05% 46%
1.70% 50%
3.02% 54%
5.00% 57%
7.02% 58%
10.00% 58%

Known limitations of this initial version:
* Can't handle "no changes". It will give false results if changes are
further apart than 2^16 bits.
* It is a simulator, and I have not yet verified that encoding+decoding
gives correct output. It should, but I cannot make any guarantees until
I actually have written and tested an complete encoder+decoder.

How to read the attached results:

1: % of bits changed in the digests
2: program name, where bitoffset is results from this program and
cd_diff is similar results from the NLANR cd_diff.c program.
3: digest time stams from the Date headers ddhhmm-ddhhmm (day hour
minute).
4: resulting output size
5: % of the original size
6: "reduction factor"

The digest size in this run is 866590 bytes.

The bit change variation shown in the report is generated by running on
a permutation of collected digests (see time stamps). This is to show
how the program behaves when the amount of changes climbs up.

The attached C program is provided for your own amusement only. It is
work in progress and not intended to be distrubuted outside squid-dev. I
am not very proud of the code, it is merely a proof of concept.

/Henrik

    [ Part 2: "Attached Text" ]

 0.417%: bitoffset 250907-251007 = 36262 ( 4.18% : x 23.90)
 0.417%: cd_diff 250907-251007 = 57002 ( 6.58% : x 15.20)
 0.501%: bitoffset 251208-251308 = 41635 ( 4.80% : x 20.81)
 0.501%: cd_diff 251208-251308 = 68362 ( 7.89% : x 12.68)
 0.526%: bitoffset 241259-241400 = 43319 ( 5.00% : x 20.00)
 0.526%: cd_diff 241259-241400 = 71672 ( 8.27% : x 12.09)
 0.534%: bitoffset 242203-242303 = 43743 ( 5.05% : x 19.81)
 0.534%: cd_diff 242203-242303 = 72492 ( 8.37% : x 11.95)
 0.544%: bitoffset 250204-250305 = 44386 ( 5.12% : x 19.52)
 0.544%: cd_diff 250204-250305 = 73990 ( 8.54% : x 11.71)
 0.553%: bitoffset 251308-251409 = 44994 ( 5.19% : x 19.26)
 0.553%: cd_diff 251308-251409 = 75114 ( 8.67% : x 11.54)
 0.557%: bitoffset 250104-250204 = 45201 ( 5.22% : x 19.17)
 0.557%: cd_diff 250104-250204 = 75734 ( 8.74% : x 11.44)
 0.574%: bitoffset 242303-250004 = 46323 ( 5.35% : x 18.71)
 0.574%: cd_diff 242303-250004 = 77946 ( 8.99% : x 11.12)
 0.578%: bitoffset 241400-241500 = 46543 ( 5.37% : x 18.62)
 0.578%: cd_diff 241400-241500 = 78458 ( 9.05% : x 11.05)
 0.583%: bitoffset 250606-250706 = 46876 ( 5.41% : x 18.49)
 0.583%: cd_diff 250606-250706 = 79120 ( 9.13% : x 10.95)
 0.600%: bitoffset 250004-250104 = 47898 ( 5.53% : x 18.09)
 0.600%: cd_diff 250004-250104 = 81486 ( 9.40% : x 10.63)
 0.601%: bitoffset 250505-250606 = 48022 ( 5.54% : x 18.05)
 0.601%: cd_diff 250505-250606 = 81632 ( 9.42% : x 10.62)
 0.639%: bitoffset 241500-241600 = 50360 ( 5.81% : x 17.21)
 0.639%: cd_diff 241500-241600 = 86534 ( 9.99% : x 10.01)
 0.652%: bitoffset 241600-241701 = 51089 ( 5.90% : x 16.96)
 0.652%: cd_diff 241600-241701 = 88226 ( 10.18% : x 9.82)
 0.654%: bitoffset 250305-250405 = 51248 ( 5.91% : x 16.91)
 0.654%: cd_diff 250305-250405 = 88652 ( 10.23% : x 9.78)
 0.659%: bitoffset 250405-250505 = 51593 ( 5.95% : x 16.80)
 0.659%: cd_diff 250405-250505 = 89228 ( 10.30% : x 9.71)
 0.695%: bitoffset 242102-242203 = 53801 ( 6.21% : x 16.11)
 0.695%: cd_diff 242102-242203 = 94112 ( 10.86% : x 9.21)
 0.719%: bitoffset 242002-242102 = 55214 ( 6.37% : x 15.70)
 0.719%: cd_diff 242002-242102 = 97246 ( 11.22% : x 8.91)
 0.878%: bitoffset 251007-251208 = 64591 ( 7.45% : x 13.42)
 0.878%: cd_diff 251007-251208 = 118040 ( 13.62% : x 7.34)
 0.953%: bitoffset 250706-250907 = 68910 ( 7.95% : x 12.58)
 0.953%: cd_diff 250706-250907 = 127824 ( 14.75% : x 6.78)
 1.047%: bitoffset 251208-251409 = 74198 ( 8.56% : x 11.68)
 1.047%: cd_diff 251208-251409 = 139920 ( 16.15% : x 6.19)
 1.091%: bitoffset 250104-250305 = 76692 ( 8.85% : x 11.30)
 1.091%: cd_diff 250104-250305 = 145472 ( 16.79% : x 5.96)
 1.095%: bitoffset 241259-241500 = 76985 ( 8.88% : x 11.26)
 1.095%: cd_diff 241259-241500 = 146218 ( 16.87% : x 5.93)
 1.100%: bitoffset 242203-250004 = 77195 ( 8.91% : x 11.23)
 1.100%: cd_diff 242203-250004 = 146632 ( 16.92% : x 5.91)
 1.145%: bitoffset 250004-250204 = 79730 ( 9.20% : x 10.87)
 1.145%: cd_diff 250004-250204 = 152456 ( 17.59% : x 5.68)
 1.161%: bitoffset 242303-250104 = 80590 ( 9.30% : x 10.75)
 1.161%: cd_diff 242303-250104 = 154660 ( 17.85% : x 5.60)
 1.170%: bitoffset 250505-250706 = 81123 ( 9.36% : x 10.68)
 1.170%: cd_diff 250505-250706 = 155502 ( 17.94% : x 5.57)
 1.183%: bitoffset 250204-250405 = 81879 ( 9.45% : x 10.58)
 1.183%: cd_diff 250204-250405 = 157620 ( 18.19% : x 5.50)
 1.207%: bitoffset 241400-241600 = 83161 ( 9.60% : x 10.42)
 1.207%: cd_diff 241400-241600 = 160450 ( 18.52% : x 5.40)
 1.222%: bitoffset 242102-242303 = 83951 ( 9.69% : x 10.32)
 1.222%: cd_diff 242102-242303 = 162136 ( 18.71% : x 5.34)
 1.248%: bitoffset 250405-250606 = 85428 ( 9.86% : x 10.14)
 1.248%: cd_diff 250405-250606 = 165678 ( 19.12% : x 5.23)
 1.276%: bitoffset 241500-241701 = 86888 ( 10.03% : x 9.97)
 1.276%: cd_diff 241500-241701 = 169070 ( 19.51% : x 5.13)
 1.283%: bitoffset 250907-251208 = 87321 ( 10.08% : x 9.92)
 1.283%: cd_diff 250907-251208 = 170104 ( 19.63% : x 5.09)
 1.303%: bitoffset 250305-250505 = 88362 ( 10.20% : x 9.81)
 1.303%: cd_diff 250305-250505 = 172720 ( 19.93% : x 5.02)
 1.359%: bitoffset 250706-251007 = 91336 ( 10.54% : x 9.49)
 1.359%: cd_diff 250706-251007 = 179720 ( 20.74% : x 4.82)
 1.367%: bitoffset 251007-251308 = 91773 ( 10.59% : x 9.44)
 1.367%: cd_diff 251007-251308 = 180532 ( 20.83% : x 4.80)
 1.404%: bitoffset 242002-242203 = 93746 ( 10.82% : x 9.24)
 1.404%: cd_diff 242002-242203 = 185562 ( 21.41% : x 4.67)
 1.521%: bitoffset 250606-250907 = 99827 ( 11.52% : x 8.68)
 1.521%: cd_diff 250606-250907 = 200032 ( 23.08% : x 4.33)
 1.670%: bitoffset 250004-250305 = 107405 ( 12.39% : x 8.07)
 1.670%: cd_diff 250004-250305 = 218232 ( 25.18% : x 3.97)
 1.677%: bitoffset 242203-250104 = 107785 ( 12.44% : x 8.04)
 1.677%: cd_diff 242203-250104 = 219404 ( 25.32% : x 3.95)
 1.699%: bitoffset 242303-250204 = 108859 ( 12.56% : x 7.96)
 1.699%: cd_diff 242303-250204 = 221910 ( 25.61% : x 3.91)
 1.710%: bitoffset 241259-241600 = 109437 ( 12.63% : x 7.92)
 1.710%: cd_diff 241259-241600 = 223366 ( 25.78% : x 3.88)
 1.719%: bitoffset 250104-250405 = 109879 ( 12.68% : x 7.89)
 1.719%: cd_diff 250104-250405 = 224488 ( 25.90% : x 3.86)
 1.766%: bitoffset 250907-251308 = 112219 ( 12.95% : x 7.72)
 1.766%: cd_diff 250907-251308 = 230116 ( 26.55% : x 3.77)
 1.776%: bitoffset 242102-250004 = 112775 ( 13.01% : x 7.68)
 1.776%: cd_diff 242102-250004 = 231346 ( 26.70% : x 3.75)
 1.802%: bitoffset 250405-250706 = 114068 ( 13.16% : x 7.60)
 1.802%: cd_diff 250405-250706 = 234416 ( 27.05% : x 3.70)
 1.821%: bitoffset 250204-250505 = 114996 ( 13.27% : x 7.54)
 1.821%: cd_diff 250204-250505 = 236962 ( 27.34% : x 3.66)
 1.832%: bitoffset 241400-241701 = 115578 ( 13.34% : x 7.50)
 1.832%: cd_diff 241400-241701 = 238262 ( 27.49% : x 3.64)
 1.888%: bitoffset 250305-250606 = 118339 ( 13.66% : x 7.32)
 1.888%: cd_diff 250305-250606 = 245220 ( 28.30% : x 3.53)
 1.902%: bitoffset 251007-251409 = 118974 ( 13.73% : x 7.28)
 1.902%: cd_diff 251007-251409 = 246524 ( 28.45% : x 3.52)
 1.917%: bitoffset 242002-242303 = 119776 ( 13.82% : x 7.24)
 1.917%: cd_diff 242002-242303 = 248556 ( 28.68% : x 3.49)
 1.918%: bitoffset 250606-251007 = 119760 ( 13.82% : x 7.24)
 1.918%: cd_diff 250606-251007 = 248868 ( 28.72% : x 3.48)
 2.073%: bitoffset 241701-242002 = 127369 ( 14.70% : x 6.80)
 2.073%: cd_diff 241701-242002 = 267840 ( 30.91% : x 3.24)
 2.094%: bitoffset 250505-250907 = 128389 ( 14.82% : x 6.75)
 2.094%: cd_diff 250505-250907 = 269760 ( 31.13% : x 3.21)
 2.204%: bitoffset 242203-250204 = 133646 ( 15.42% : x 6.48)
 2.204%: cd_diff 242203-250204 = 283052 ( 32.66% : x 3.06)
 2.208%: bitoffset 250706-251208 = 133857 ( 15.45% : x 6.47)
 2.208%: cd_diff 250706-251208 = 283512 ( 32.72% : x 3.06)
 2.216%: bitoffset 242303-250305 = 134212 ( 15.49% : x 6.46)
 2.216%: cd_diff 242303-250305 = 284294 ( 32.81% : x 3.05)
 2.287%: bitoffset 250004-250405 = 137613 ( 15.88% : x 6.30)
 2.287%: cd_diff 250004-250405 = 292752 ( 33.78% : x 2.96)
 2.298%: bitoffset 250907-251409 = 138127 ( 15.94% : x 6.27)
 2.298%: cd_diff 250907-251409 = 294038 ( 33.93% : x 2.95)
 2.325%: bitoffset 241259-241701 = 139431 ( 16.09% : x 6.22)
 2.325%: cd_diff 241259-241701 = 297220 ( 34.30% : x 2.92)
 2.343%: bitoffset 242102-250104 = 140229 ( 16.18% : x 6.18)
 2.343%: cd_diff 242102-250104 = 299396 ( 34.55% : x 2.89)
 2.345%: bitoffset 250104-250505 = 140325 ( 16.19% : x 6.18)
 2.345%: cd_diff 250104-250505 = 299652 ( 34.58% : x 2.89)
 2.397%: bitoffset 250204-250606 = 142757 ( 16.47% : x 6.07)
 2.397%: cd_diff 250204-250606 = 305746 ( 35.28% : x 2.83)
 2.436%: bitoffset 250305-250706 = 144590 ( 16.68% : x 5.99)
 2.436%: cd_diff 250305-250706 = 310188 ( 35.79% : x 2.79)
 2.458%: bitoffset 242002-250004 = 145618 ( 16.80% : x 5.95)
 2.458%: cd_diff 242002-250004 = 312902 ( 36.11% : x 2.77)
 2.485%: bitoffset 250505-251007 = 146873 ( 16.95% : x 5.90)
 2.485%: cd_diff 250505-251007 = 315828 ( 36.44% : x 2.74)
 2.682%: bitoffset 250706-251308 = 155780 ( 17.98% : x 5.56)
 2.682%: cd_diff 250706-251308 = 338844 ( 39.10% : x 2.56)
 2.689%: bitoffset 241600-242002 = 156093 ( 18.01% : x 5.55)
 2.689%: cd_diff 241600-242002 = 339712 ( 39.20% : x 2.55)
 2.701%: bitoffset 250405-250907 = 156656 ( 18.08% : x 5.53)
 2.701%: cd_diff 250405-250907 = 340858 ( 39.33% : x 2.54)
 2.714%: bitoffset 242203-250305 = 157218 ( 18.14% : x 5.51)
 2.714%: cd_diff 242203-250305 = 342454 ( 39.52% : x 2.53)
 2.753%: bitoffset 241701-242102 = 158985 ( 18.35% : x 5.45)
 2.753%: cd_diff 241701-242102 = 347574 ( 40.11% : x 2.49)
 2.755%: bitoffset 250606-251208 = 159027 ( 18.35% : x 5.45)
 2.755%: cd_diff 250606-251208 = 347202 ( 40.07% : x 2.50)
 2.824%: bitoffset 242303-250405 = 162094 ( 18.70% : x 5.35)
 2.824%: cd_diff 242303-250405 = 354966 ( 40.96% : x 2.44)
 2.861%: bitoffset 242102-250204 = 163751 ( 18.90% : x 5.29)
 2.861%: cd_diff 242102-250204 = 359070 ( 41.43% : x 2.41)
 2.897%: bitoffset 250004-250505 = 165308 ( 19.08% : x 5.24)
 2.897%: cd_diff 250004-250505 = 363120 ( 41.90% : x 2.39)
 2.915%: bitoffset 250104-250606 = 166146 ( 19.17% : x 5.22)
 2.915%: cd_diff 250104-250606 = 365272 ( 42.15% : x 2.37)
 2.937%: bitoffset 250204-250706 = 167087 ( 19.28% : x 5.19)
 2.937%: cd_diff 250204-250706 = 367582 ( 42.42% : x 2.36)
 3.017%: bitoffset 242002-250104 = 170545 ( 19.68% : x 5.08)
 3.017%: cd_diff 242002-250104 = 376704 ( 43.47% : x 2.30)
 3.083%: bitoffset 250405-251007 = 173384 ( 20.01% : x 5.00)
 3.083%: cd_diff 250405-251007 = 383942 ( 44.30% : x 2.26)
 3.202%: bitoffset 250706-251409 = 178466 ( 20.59% : x 4.86)
 3.202%: cd_diff 250706-251409 = 397268 ( 45.84% : x 2.18)
 3.220%: bitoffset 250606-251308 = 179217 ( 20.68% : x 4.84)
 3.220%: cd_diff 250606-251308 = 399350 ( 46.08% : x 2.17)
 3.285%: bitoffset 241500-242002 = 181957 ( 21.00% : x 4.76)
 3.285%: cd_diff 241500-242002 = 406834 ( 46.95% : x 2.13)
 3.308%: bitoffset 250505-251208 = 182929 ( 21.11% : x 4.74)
 3.308%: cd_diff 250505-251208 = 408662 ( 47.16% : x 2.12)
 3.315%: bitoffset 242203-250405 = 183271 ( 21.15% : x 4.73)
 3.315%: cd_diff 242203-250405 = 409862 ( 47.30% : x 2.11)
 3.317%: bitoffset 250305-250907 = 183328 ( 21.16% : x 4.73)
 3.317%: cd_diff 250305-250907 = 410086 ( 47.32% : x 2.11)
 3.358%: bitoffset 241600-242102 = 185079 ( 21.36% : x 4.68)
 3.358%: cd_diff 241600-242102 = 414806 ( 47.87% : x 2.09)
 3.361%: bitoffset 242102-250305 = 185203 ( 21.37% : x 4.68)
 3.361%: cd_diff 242102-250305 = 414610 ( 47.84% : x 2.09)
 3.399%: bitoffset 241701-242203 = 186786 ( 21.55% : x 4.64)
 3.399%: cd_diff 241701-242203 = 419628 ( 48.42% : x 2.07)
 3.424%: bitoffset 242303-250505 = 187833 ( 21.67% : x 4.61)
 3.424%: cd_diff 242303-250505 = 421448 ( 48.63% : x 2.06)
 3.448%: bitoffset 250104-250706 = 188847 ( 21.79% : x 4.59)
 3.448%: cd_diff 250104-250706 = 423982 ( 48.93% : x 2.04)
 3.460%: bitoffset 250004-250606 = 189349 ( 21.85% : x 4.58)
 3.460%: cd_diff 250004-250606 = 425306 ( 49.08% : x 2.04)
 3.529%: bitoffset 242002-250204 = 192171 ( 22.18% : x 4.51)
 3.529%: cd_diff 242002-250204 = 432886 ( 49.95% : x 2.00)
 3.693%: bitoffset 250305-251007 = 198913 ( 22.95% : x 4.36)
 3.693%: cd_diff 250305-251007 = 450692 ( 52.01% : x 1.92)
 3.727%: bitoffset 250606-251409 = 200264 ( 23.11% : x 4.33)
 3.727%: cd_diff 250606-251409 = 454062 ( 52.40% : x 1.91)
 3.766%: bitoffset 250505-251308 = 201855 ( 23.29% : x 4.29)
 3.766%: cd_diff 250505-251308 = 457916 ( 52.84% : x 1.89)
 3.805%: bitoffset 250204-250907 = 203488 ( 23.48% : x 4.26)
 3.805%: cd_diff 250204-250907 = 462566 ( 53.38% : x 1.87)
 3.818%: bitoffset 241400-242002 = 204003 ( 23.54% : x 4.25)
 3.818%: cd_diff 241400-242002 = 464238 ( 53.57% : x 1.87)
 3.886%: bitoffset 241701-242303 = 206737 ( 23.86% : x 4.19)
 3.886%: cd_diff 241701-242303 = 471368 ( 54.39% : x 1.84)
 3.890%: bitoffset 250405-251208 = 206923 ( 23.88% : x 4.19)
 3.890%: cd_diff 250405-251208 = 471178 ( 54.37% : x 1.84)
 3.908%: bitoffset 242203-250505 = 207640 ( 23.96% : x 4.17)
 3.908%: cd_diff 242203-250505 = 473238 ( 54.61% : x 1.83)
 3.949%: bitoffset 241500-242102 = 209251 ( 24.15% : x 4.14)
 3.949%: cd_diff 241500-242102 = 478258 ( 55.19% : x 1.81)
 3.955%: bitoffset 242102-250405 = 209531 ( 24.18% : x 4.14)
 3.955%: cd_diff 242102-250405 = 478170 ( 55.18% : x 1.81)
 3.982%: bitoffset 242303-250606 = 210630 ( 24.31% : x 4.11)
 3.982%: cd_diff 242303-250606 = 480908 ( 55.49% : x 1.80)
 3.983%: bitoffset 250004-250706 = 210651 ( 24.31% : x 4.11)
 3.983%: cd_diff 250004-250706 = 480708 ( 55.47% : x 1.80)
 3.995%: bitoffset 241600-242203 = 211135 ( 24.36% : x 4.10)
 3.995%: cd_diff 241600-242203 = 482756 ( 55.71% : x 1.80)
 4.021%: bitoffset 242002-250305 = 212217 ( 24.49% : x 4.08)
 4.021%: cd_diff 242002-250305 = 485230 ( 55.99% : x 1.79)
 4.177%: bitoffset 250204-251007 = 218374 ( 25.20% : x 3.97)
 4.177%: cd_diff 250204-251007 = 501250 ( 57.84% : x 1.73)
 4.269%: bitoffset 250505-251409 = 222067 ( 25.63% : x 3.90)
 4.269%: cd_diff 250505-251409 = 510296 ( 58.89% : x 1.70)
 4.295%: bitoffset 241259-242002 = 223039 ( 25.74% : x 3.89)
 4.295%: cd_diff 241259-242002 = 513772 ( 59.29% : x 1.69)
 4.307%: bitoffset 250104-250907 = 223548 ( 25.80% : x 3.88)
 4.307%: cd_diff 250104-250907 = 514524 ( 59.37% : x 1.68)
 4.344%: bitoffset 250405-251308 = 225019 ( 25.97% : x 3.85)
 4.344%: cd_diff 250405-251308 = 517978 ( 59.77% : x 1.67)
 4.408%: bitoffset 241701-250004 = 227481 ( 26.25% : x 3.81)
 4.408%: cd_diff 241701-250004 = 525382 ( 60.63% : x 1.65)
 4.460%: bitoffset 242203-250606 = 229516 ( 26.48% : x 3.78)
 4.460%: cd_diff 242203-250606 = 529882 ( 61.15% : x 1.64)
 4.476%: bitoffset 241400-242102 = 230126 ( 26.56% : x 3.77)
 4.476%: cd_diff 241400-242102 = 532330 ( 61.43% : x 1.63)
 4.478%: bitoffset 241600-242303 = 230223 ( 26.57% : x 3.76)
 4.478%: cd_diff 241600-242303 = 531720 ( 61.36% : x 1.63)
 4.492%: bitoffset 250305-251208 = 230760 ( 26.63% : x 3.76)
 4.492%: cd_diff 250305-251208 = 533180 ( 61.53% : x 1.63)
 4.498%: bitoffset 242303-250706 = 230990 ( 26.66% : x 3.75)
 4.498%: cd_diff 242303-250706 = 533518 ( 61.57% : x 1.62)
 4.540%: bitoffset 242102-250505 = 232615 ( 26.84% : x 3.73)
 4.540%: cd_diff 242102-250505 = 537886 ( 62.07% : x 1.61)
 4.581%: bitoffset 241500-242203 = 234189 ( 27.02% : x 3.70)
 4.581%: cd_diff 241500-242203 = 542806 ( 62.64% : x 1.60)
 4.612%: bitoffset 242002-250405 = 235398 ( 27.16% : x 3.68)
 4.612%: cd_diff 242002-250405 = 545464 ( 62.94% : x 1.59)
 4.673%: bitoffset 250104-251007 = 237745 ( 27.43% : x 3.65)
 4.673%: cd_diff 250104-251007 = 551358 ( 63.62% : x 1.57)
 4.834%: bitoffset 250004-250907 = 243818 ( 28.14% : x 3.55)
 4.834%: cd_diff 250004-250907 = 567234 ( 65.46% : x 1.53)
 4.843%: bitoffset 250405-251409 = 244147 ( 28.17% : x 3.55)
 4.843%: cd_diff 250405-251409 = 567754 ( 65.52% : x 1.53)
 4.942%: bitoffset 250305-251308 = 247843 ( 28.60% : x 3.50)
 4.942%: cd_diff 250305-251308 = 577622 ( 66.65% : x 1.50)
 4.946%: bitoffset 241259-242102 = 247982 ( 28.62% : x 3.49)
 4.946%: cd_diff 241259-242102 = 578780 ( 66.79% : x 1.50)
 4.949%: bitoffset 241701-250104 = 248094 ( 28.63% : x 3.49)
 4.949%: cd_diff 241701-250104 = 579068 ( 66.82% : x 1.50)
 4.963%: bitoffset 250204-251208 = 248640 ( 28.69% : x 3.49)
 4.963%: cd_diff 250204-251208 = 579902 ( 66.92% : x 1.49)
 4.969%: bitoffset 242203-250706 = 248839 ( 28.71% : x 3.48)
 4.969%: cd_diff 242203-250706 = 580082 ( 66.94% : x 1.49)
 4.992%: bitoffset 241600-250004 = 249714 ( 28.82% : x 3.47)
 4.992%: cd_diff 241600-250004 = 582698 ( 67.24% : x 1.49)
 5.059%: bitoffset 241500-242303 = 252180 ( 29.10% : x 3.44)
 5.059%: cd_diff 241500-242303 = 589176 ( 67.99% : x 1.47)
 5.083%: bitoffset 242102-250606 = 253053 ( 29.20% : x 3.42)
 5.083%: cd_diff 242102-250606 = 591150 ( 68.22% : x 1.47)
 5.100%: bitoffset 241400-242203 = 253680 ( 29.27% : x 3.42)
 5.100%: cd_diff 241400-242203 = 593616 ( 68.50% : x 1.46)
 5.191%: bitoffset 242002-250505 = 256957 ( 29.65% : x 3.37)
 5.191%: cd_diff 242002-250505 = 601818 ( 69.45% : x 1.44)
 5.198%: bitoffset 250004-251007 = 257223 ( 29.68% : x 3.37)
 5.198%: cd_diff 250004-251007 = 602270 ( 69.50% : x 1.44)
 5.338%: bitoffset 242303-250907 = 262342 ( 30.27% : x 3.30)
 5.338%: cd_diff 242303-250907 = 615778 ( 71.06% : x 1.41)
 5.404%: bitoffset 250204-251308 = 264750 ( 30.55% : x 3.27)
 5.404%: cd_diff 250204-251308 = 621960 ( 71.77% : x 1.39)
 5.435%: bitoffset 250305-251409 = 265823 ( 30.67% : x 3.26)
 5.435%: cd_diff 250305-251409 = 624800 ( 72.10% : x 1.39)
 5.442%: bitoffset 241701-250204 = 266087 ( 30.71% : x 3.26)
 5.442%: cd_diff 241701-250204 = 626088 ( 72.25% : x 1.38)
 5.446%: bitoffset 250104-251208 = 266203 ( 30.72% : x 3.26)
 5.446%: cd_diff 250104-251208 = 625848 ( 72.22% : x 1.38)
 5.522%: bitoffset 241600-250104 = 268922 ( 31.03% : x 3.22)
 5.522%: cd_diff 241600-250104 = 633148 ( 73.06% : x 1.37)
 5.560%: bitoffset 241259-242203 = 270267 ( 31.19% : x 3.21)
 5.560%: cd_diff 241259-242203 = 637172 ( 73.53% : x 1.36)
 5.567%: bitoffset 241500-250004 = 270506 ( 31.21% : x 3.20)
 5.567%: cd_diff 241500-250004 = 637370 ( 73.55% : x 1.36)
 5.572%: bitoffset 241400-242303 = 270717 ( 31.24% : x 3.20)
 5.572%: cd_diff 241400-242303 = 637784 ( 73.60% : x 1.36)
 5.585%: bitoffset 242102-250706 = 271147 ( 31.29% : x 3.20)
 5.585%: cd_diff 242102-250706 = 638436 ( 73.67% : x 1.36)
 5.697%: bitoffset 242303-251007 = 275064 ( 31.74% : x 3.15)
 5.697%: cd_diff 242303-251007 = 649158 ( 74.91% : x 1.33)
 5.729%: bitoffset 242002-250606 = 276183 ( 31.87% : x 3.14)
 5.729%: cd_diff 242002-250606 = 652018 ( 75.24% : x 1.33)
 5.796%: bitoffset 242203-250907 = 278576 ( 32.15% : x 3.11)
 5.796%: cd_diff 242203-250907 = 658286 ( 75.96% : x 1.32)
 5.881%: bitoffset 250104-251308 = 281497 ( 32.48% : x 3.08)
 5.881%: cd_diff 250104-251308 = 665894 ( 76.84% : x 1.30)
 5.894%: bitoffset 250204-251409 = 282003 ( 32.54% : x 3.07)
 5.894%: cd_diff 250204-251409 = 667194 ( 76.99% : x 1.30)
 5.914%: bitoffset 241701-250305 = 282661 ( 32.62% : x 3.07)
 5.914%: cd_diff 241701-250305 = 669676 ( 77.28% : x 1.29)
 5.961%: bitoffset 250004-251208 = 284245 ( 32.80% : x 3.05)
 5.961%: cd_diff 250004-251208 = 673052 ( 77.67% : x 1.29)
 6.010%: bitoffset 241600-250204 = 285947 ( 33.00% : x 3.03)
 6.010%: cd_diff 241600-250204 = 677688 ( 78.20% : x 1.28)
 6.029%: bitoffset 241259-242303 = 286620 ( 33.07% : x 3.02)
 6.029%: cd_diff 241259-242303 = 679498 ( 78.41% : x 1.28)
 6.074%: bitoffset 241400-250004 = 288149 ( 33.25% : x 3.01)
 6.074%: cd_diff 241400-250004 = 683664 ( 78.89% : x 1.27)
 6.090%: bitoffset 241500-250104 = 288675 ( 33.31% : x 3.00)
 6.090%: cd_diff 241500-250104 = 685122 ( 79.06% : x 1.26)
 6.151%: bitoffset 242203-251007 = 290709 ( 33.55% : x 2.98)
 6.151%: cd_diff 242203-251007 = 690266 ( 79.65% : x 1.26)
 6.225%: bitoffset 242002-250706 = 293212 ( 33.84% : x 2.96)
 6.225%: cd_diff 242002-250706 = 696580 ( 80.38% : x 1.24)
 6.363%: bitoffset 250104-251409 = 297968 ( 34.38% : x 2.91)
 6.363%: cd_diff 250104-251409 = 708990 ( 81.81% : x 1.22)
 6.392%: bitoffset 250004-251308 = 298943 ( 34.50% : x 2.90)
 6.392%: cd_diff 250004-251308 = 711360 ( 82.09% : x 1.22)
 6.396%: bitoffset 242102-250907 = 299026 ( 34.51% : x 2.90)
 6.396%: cd_diff 242102-250907 = 711714 ( 82.13% : x 1.22)
 6.456%: bitoffset 242303-251208 = 301073 ( 34.74% : x 2.88)
 6.456%: cd_diff 242303-251208 = 717048 ( 82.74% : x 1.21)
 6.476%: bitoffset 241600-250305 = 301701 ( 34.81% : x 2.87)
 6.476%: cd_diff 241600-250305 = 718948 ( 82.96% : x 1.21)
 6.480%: bitoffset 241701-250405 = 301831 ( 34.83% : x 2.87)
 6.480%: cd_diff 241701-250405 = 719796 ( 83.06% : x 1.20)
 6.527%: bitoffset 241259-250004 = 303405 ( 35.01% : x 2.86)
 6.527%: cd_diff 241259-250004 = 723402 ( 83.48% : x 1.20)
 6.574%: bitoffset 241500-250204 = 304941 ( 35.19% : x 2.84)
 6.574%: cd_diff 241500-250204 = 727374 ( 83.94% : x 1.19)
 6.592%: bitoffset 241400-250104 = 305543 ( 35.26% : x 2.84)
 6.592%: cd_diff 241400-250104 = 729028 ( 84.13% : x 1.19)
 6.748%: bitoffset 242102-251007 = 310668 ( 35.85% : x 2.79)
 6.748%: cd_diff 242102-251007 = 742060 ( 85.63% : x 1.17)
 6.871%: bitoffset 250004-251409 = 314748 ( 36.32% : x 2.75)
 6.871%: cd_diff 250004-251409 = 752394 ( 86.82% : x 1.15)
 6.883%: bitoffset 242303-251308 = 315139 ( 36.37% : x 2.75)
 6.883%: cd_diff 242303-251308 = 753584 ( 86.96% : x 1.15)
 6.902%: bitoffset 242203-251208 = 315765 ( 36.44% : x 2.74)
 6.902%: cd_diff 242203-251208 = 755234 ( 87.15% : x 1.15)
 7.023%: bitoffset 242002-250907 = 319651 ( 36.89% : x 2.71)
 7.023%: cd_diff 242002-250907 = 765326 ( 88.31% : x 1.13)
 7.034%: bitoffset 241500-250305 = 320030 ( 36.93% : x 2.71)
 7.034%: cd_diff 241500-250305 = 766436 ( 88.44% : x 1.13)
 7.036%: bitoffset 241600-250405 = 320132 ( 36.94% : x 2.71)
 7.036%: cd_diff 241600-250405 = 766648 ( 88.47% : x 1.13)
 7.037%: bitoffset 241701-250505 = 320172 ( 36.95% : x 2.71)
 7.037%: cd_diff 241701-250505 = 767000 ( 88.51% : x 1.13)
 7.039%: bitoffset 241259-250104 = 320209 ( 36.95% : x 2.71)
 7.039%: cd_diff 241259-250104 = 766734 ( 88.48% : x 1.13)
 7.070%: bitoffset 241400-250204 = 321194 ( 37.06% : x 2.70)
 7.070%: cd_diff 241400-250204 = 769360 ( 88.78% : x 1.13)
 7.325%: bitoffset 242203-251308 = 329368 ( 38.01% : x 2.63)
 7.325%: cd_diff 242203-251308 = 790230 ( 91.19% : x 1.10)
 7.357%: bitoffset 242303-251409 = 330379 ( 38.12% : x 2.62)
 7.357%: cd_diff 242303-251409 = 792686 ( 91.47% : x 1.09)
 7.371%: bitoffset 242002-251007 = 330777 ( 38.17% : x 2.62)
 7.371%: cd_diff 242002-251007 = 793952 ( 91.62% : x 1.09)
 7.490%: bitoffset 242102-251208 = 334616 ( 38.61% : x 2.59)
 7.490%: cd_diff 242102-251208 = 803342 ( 92.70% : x 1.08)
 7.512%: bitoffset 241259-250204 = 335254 ( 38.69% : x 2.58)
 7.512%: cd_diff 241259-250204 = 805402 ( 92.94% : x 1.08)
 7.525%: bitoffset 241400-250305 = 335689 ( 38.74% : x 2.58)
 7.525%: cd_diff 241400-250305 = 806580 ( 93.08% : x 1.07)
 7.560%: bitoffset 241701-250606 = 336821 ( 38.87% : x 2.57)
 7.560%: cd_diff 241701-250606 = 809332 ( 93.39% : x 1.07)
 7.587%: bitoffset 241500-250405 = 337647 ( 38.96% : x 2.57)
 7.587%: cd_diff 241500-250405 = 811392 ( 93.63% : x 1.07)
 7.590%: bitoffset 241600-250505 = 337756 ( 38.98% : x 2.57)
 7.590%: cd_diff 241600-250505 = 811566 ( 93.65% : x 1.07)
 7.792%: bitoffset 242203-251409 = 344211 ( 39.72% : x 2.52)
 7.792%: cd_diff 242203-251409 = 827502 ( 95.49% : x 1.05)
 7.908%: bitoffset 242102-251308 = 347834 ( 40.14% : x 2.49)
 7.908%: cd_diff 242102-251308 = 836476 ( 96.53% : x 1.04)
 7.964%: bitoffset 241259-250305 = 349508 ( 40.33% : x 2.48)
 7.964%: cd_diff 241259-250305 = 841190 ( 97.07% : x 1.03)
 8.034%: bitoffset 241701-250706 = 351705 ( 40.58% : x 2.46)
 8.034%: cd_diff 241701-250706 = 846680 ( 97.70% : x 1.02)
 8.073%: bitoffset 241400-250405 = 352872 ( 40.72% : x 2.46)
 8.073%: cd_diff 241400-250405 = 849630 ( 98.04% : x 1.02)
 8.102%: bitoffset 242002-251208 = 353816 ( 40.83% : x 2.45)
 8.102%: cd_diff 242002-251208 = 851752 ( 98.29% : x 1.02)
 8.103%: bitoffset 241600-250606 = 353790 ( 40.83% : x 2.45)
 8.103%: cd_diff 241600-250606 = 851314 ( 98.24% : x 1.02)
 8.135%: bitoffset 241500-250505 = 354798 ( 40.94% : x 2.44)
 8.135%: cd_diff 241500-250505 = 854164 ( 98.57% : x 1.01)
 8.368%: bitoffset 242102-251409 = 361988 ( 41.77% : x 2.39)
 8.368%: cd_diff 242102-251409 = 871636 (100.58% : x 0.99)
 8.508%: bitoffset 241259-250405 = 366224 ( 42.26% : x 2.37)
 8.508%: cd_diff 241259-250405 = 882524 (101.84% : x 0.98)
 8.517%: bitoffset 242002-251308 = 366587 ( 42.30% : x 2.36)
 8.517%: cd_diff 242002-251308 = 883148 (101.91% : x 0.98)
 8.571%: bitoffset 241600-250706 = 368216 ( 42.49% : x 2.35)
 8.571%: cd_diff 241600-250706 = 886674 (102.32% : x 0.98)
 8.618%: bitoffset 241400-250505 = 369625 ( 42.65% : x 2.34)
 8.618%: cd_diff 241400-250505 = 890466 (102.76% : x 0.97)
 8.643%: bitoffset 241500-250606 = 370417 ( 42.74% : x 2.34)
 8.643%: cd_diff 241500-250606 = 891926 (102.92% : x 0.97)
 8.798%: bitoffset 241701-250907 = 375159 ( 43.29% : x 2.31)
 8.798%: cd_diff 241701-250907 = 904068 (104.32% : x 0.96)
 8.977%: bitoffset 242002-251409 = 380425 ( 43.90% : x 2.28)
 8.977%: cd_diff 242002-251409 = 916548 (105.76% : x 0.95)
 9.046%: bitoffset 241259-250505 = 382443 ( 44.13% : x 2.27)
 9.046%: cd_diff 241259-250505 = 921560 (106.34% : x 0.94)
 9.107%: bitoffset 241500-250706 = 384189 ( 44.33% : x 2.26)
 9.107%: cd_diff 241500-250706 = 925670 (106.82% : x 0.94)
 9.115%: bitoffset 241400-250606 = 384389 ( 44.36% : x 2.25)
 9.115%: cd_diff 241400-250606 = 926260 (106.89% : x 0.94)
 9.134%: bitoffset 241701-251007 = 384927 ( 44.42% : x 2.25)
 9.134%: cd_diff 241701-251007 = 928032 (107.09% : x 0.93)
 9.326%: bitoffset 241600-250907 = 390432 ( 45.05% : x 2.22)
 9.326%: cd_diff 241600-250907 = 941118 (108.60% : x 0.92)
 9.541%: bitoffset 241259-250606 = 396568 ( 45.76% : x 2.19)
 9.541%: cd_diff 241259-250606 = 955886 (110.30% : x 0.91)
 9.576%: bitoffset 241400-250706 = 397568 ( 45.88% : x 2.18)
 9.576%: cd_diff 241400-250706 = 958440 (110.60% : x 0.90)
 9.658%: bitoffset 241600-251007 = 399921 ( 46.15% : x 2.17)
 9.658%: cd_diff 241600-251007 = 963868 (111.23% : x 0.90)
 9.847%: bitoffset 241701-251208 = 405354 ( 46.78% : x 2.14)
 9.847%: cd_diff 241701-251208 = 977354 (112.78% : x 0.89)
 9.850%: bitoffset 241500-250907 = 405444 ( 46.79% : x 2.14)
 9.850%: cd_diff 241500-250907 = 977176 (112.76% : x 0.89)
 9.996%: bitoffset 241259-250706 = 409609 ( 47.27% : x 2.12)
 9.996%: cd_diff 241259-250706 = 986718 (113.86% : x 0.88)
10.175%: bitoffset 241500-251007 = 414598 ( 47.84% : x 2.09)
10.175%: cd_diff 241500-251007 = 998550 (115.23% : x 0.87)
10.251%: bitoffset 241701-251308 = 416609 ( 48.07% : x 2.08)
10.251%: cd_diff 241701-251308 = 1004060 (115.86% : x 0.86)
10.308%: bitoffset 241400-250907 = 418099 ( 48.25% : x 2.07)
10.308%: cd_diff 241400-250907 = 1007328 (116.24% : x 0.86)
10.363%: bitoffset 241600-251208 = 419564 ( 48.42% : x 2.07)
10.363%: cd_diff 241600-251208 = 1010698 (116.63% : x 0.86)
10.629%: bitoffset 241400-251007 = 426545 ( 49.22% : x 2.03)
10.629%: cd_diff 241400-251007 = 1027696 (118.59% : x 0.84)
10.696%: bitoffset 241701-251409 = 428348 ( 49.43% : x 2.02)
10.696%: cd_diff 241701-251409 = 1032428 (119.14% : x 0.84)
10.724%: bitoffset 241259-250907 = 429034 ( 49.51% : x 2.02)
10.724%: cd_diff 241259-250907 = 1033732 (119.29% : x 0.84)
10.766%: bitoffset 241600-251308 = 430140 ( 49.64% : x 2.01)
10.766%: cd_diff 241600-251308 = 1036320 (119.59% : x 0.84)
10.872%: bitoffset 241500-251208 = 432962 ( 49.96% : x 2.00)
10.872%: cd_diff 241500-251208 = 1043034 (120.36% : x 0.83)
11.043%: bitoffset 241259-251007 = 437427 ( 50.48% : x 1.98)
11.043%: cd_diff 241259-251007 = 1053340 (121.55% : x 0.82)
11.208%: bitoffset 241600-251409 = 441785 ( 50.98% : x 1.96)
11.208%: cd_diff 241600-251409 = 1063458 (122.72% : x 0.81)
11.272%: bitoffset 241500-251308 = 443458 ( 51.17% : x 1.95)
11.272%: cd_diff 241500-251308 = 1067540 (123.19% : x 0.81)
11.321%: bitoffset 241400-251208 = 444749 ( 51.32% : x 1.95)
11.321%: cd_diff 241400-251208 = 1070338 (123.51% : x 0.81)
11.711%: bitoffset 241500-251409 = 454970 ( 52.50% : x 1.90)
11.711%: cd_diff 241500-251409 = 1093390 (126.17% : x 0.79)
11.718%: bitoffset 241400-251308 = 455129 ( 52.52% : x 1.90)
11.718%: cd_diff 241400-251308 = 1093862 (126.23% : x 0.79)
11.730%: bitoffset 241259-251208 = 455467 ( 52.56% : x 1.90)
11.730%: cd_diff 241259-251208 = 1094330 (126.28% : x 0.79)
12.123%: bitoffset 241259-251308 = 465129 ( 53.67% : x 1.86)
12.123%: cd_diff 241259-251308 = 1116934 (128.89% : x 0.78)
12.153%: bitoffset 241400-251409 = 465846 ( 53.76% : x 1.86)
12.153%: cd_diff 241400-251409 = 1118712 (129.09% : x 0.77)
12.557%: bitoffset 241259-251409 = 475473 ( 54.87% : x 1.82)
12.557%: cd_diff 241259-251409 = 1140982 (131.66% : x 0.76)

    [ Part 3: "Attached Text" ]

/* cc -O -o cd_diff cd_diff.c */

#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include <assert.h>
#include <stdlib.h>
#include <sys/times.h>

typedef struct {
    int size;
    unsigned char *data;
} DataBuffer;

#define MIN(a,b) (a<b?a:b)
#define MAX(a,b) (a>b?a:b)
#define PERCENT(n,total) (((n)*100+50)/(total))
#define BTEST(b,buf) ((buf.data[(b)/8]&(1<<((b)&7)))!=0)

DataBuffer readFile(int fd)
{
    DataBuffer buf;
    int startpos = lseek(fd, 0, SEEK_CUR);
    int endpos = lseek(fd, 0, SEEK_END);
    int size = endpos - startpos;

    buf.size = size;
    buf.data = malloc(size);
    assert(buf.data);

    lseek(fd, startpos, SEEK_SET);
    if (read(fd, buf.data, size) != size) {
        perror("Unexpected read size");
        exit(1);
    }
    return buf;
}

static int bits_in_byte[256];

void init_bitcount()
{
    int i, j;
    for (i=0; i<256; i++) {
        bits_in_byte[i] = 0;
        for( j=i ; j != 0 ; j = j>>1) {
            if (j & 1) bits_in_byte[i] += 1;
        }
    }
}

long bitcount(DataBuffer buf)
{
    int i;
    long bits = 0;
    for (i=0 ; i <buf.size; i++) {
        bits += bits_in_byte[buf.data[i]];
    }
    return bits;
}

long bitblockcount(DataBuffer buf)
{
    int i;
    int lastbit = 0;
    int bit;
    long blocks = 0;
    for (i=0 ; i <buf.size * 8; i++) {
        bit = BTEST(i,buf);
        if (bit && !lastbit)
            blocks++;
        lastbit = bit;
    }
    return blocks;
}

long bytecount(DataBuffer buf)
{
    int i;
    long bytes = 0;
    for (i=0 ; i <buf.size; i++) {
        if (buf.data[i]) bytes += 1;
    }
    return bytes;
}

int *bitDistances(DataBuffer buf) {
    int i;
    int len=0;
    int lastbit=-1;
    static int bitdist[65536];
    for(i=0; i<65536;i++) {
        bitdist[i]=0;
    }
    for(i=0; i< 8*buf.size; i++) {
        int bit = BTEST(i,buf);
        if (bit && !lastbit) {
            bitdist[MIN(len,65535)]+=1;
        } else if (bit && lastbit) {
            bitdist[0]+=1;
        }
        if ( bit != lastbit ) {
            lastbit = bit;
            len = 1;
        } else {
            len++;
        }
    }
    return bitdist;
}

void pqdownheapR(int *a, int *p, int N, int k) {
    int j, v = p[k];
    while (k <= N/2) {
        j = k+k;
        if (j<N && a[p[j]] > a[p[j+1]]) j++;
        if (a[v] <= a[p[j]]) break;
        p[k] = p[j];
        k = j;
    }
    p[k] = v;
}

/* This assumes integer codes 0->65535, as above */
typedef struct {
    int *bits;
    int *len;
    int size;
} HuffmanCode;

HuffmanCode huffmanCode(int *data) {
    static int heap[65536];
    int N=0;
    int codes;
    static int code[65536];
    static int len[65536];
    static int count[65536*2];
    static int dad[65536*2];
    int i,k;
    HuffmanCode result;

    memset(dad,0,sizeof(dad));
    memset(count,0,sizeof(count));
    for(i=0;i<65536;i++)
        count[i]=data[i];

    /* Build index heap */
    for(i=0;i<65536;i++) if(data[i]) heap[++N]=i;
    for(i=N;i>0;i--) pqdownheapR(count,heap,N,i);

    /* Remember the size */
    codes=N;
    result.size = codes;

    /* Build a binary tree */
    while (N > 1) {
        int t = heap[1];
        heap[1] = heap[N--];
        pqdownheapR(count,heap,N,1);
        count[65536+N] = count[heap[1]]+count[t];
        dad[t] = 65536+N;
        dad[heap[1]] = -65536-N;
        heap[1] = 65536+N;
        pqdownheapR(count,heap,N,1);
    }
    dad[65536+N]=0;

    /* Make huffman code from the binary tree */
    for (k = 0; k<65536; k++) {
        if (data[k]) {
            int i = 0, j = 1, t = dad[k], x=0;
            while (t) {
                if (t<0) { x += j; t = -t; }
                t = dad[t]; j += j; i++;
            }
            code[k]=x; len[k]=i;
        } else {
            code[k] = len[k] = 0;
        }
    }

    result.bits = code;
    result.len = len;

    return result;
}

int huffmanSize(DataBuffer buf, HuffmanCode huff) {
    int i;
    int len=0;
    int lastbit=-1;
    int size=0;
    for(i=0; i< 8*buf.size; i++) {
        int bit = BTEST(i,buf);
        if (bit && !lastbit) {
            size+=huff.len[MIN(len,65535)];
        } else if (bit && lastbit) {
            size+=huff.len[0];
        }
        if ( bit != lastbit ) {
            lastbit = bit;
            len = 1;
        } else {
            len++;
        }
    }
    size = size / 8;
    size += huff.size*2;
    return size;
}

void diff(int fd1, int fd2) {
    DataBuffer buf1, buf2;
    DataBuffer xor;
    int i;
    int *distances;
    HuffmanCode huffman;
    int size;

    init_bitcount();

    buf1 = readFile(fd1);
    buf2 = readFile(fd2);
    assert(buf1.size == buf2.size);

    /* Build a XOR of the two digests */
    xor.size = buf1.size;
    xor.data = malloc(xor.size);
    assert(xor.data);
    for(i=0; i<buf1.size ; i++) {
        xor.data[i] = buf1.data[i] ^ buf2.data[i];
    }

    distances = bitDistances(xor);
    huffman = huffmanCode(distances);
    size = huffmanSize(xor, huffman);
    printf("sizes: old: %8d, new: %8d, diff: %6d (%6.2f%% : x%6.2f)\n",
            buf1.size, buf2.size, size, 100.*size/buf1.size,
            buf1.size/(double)size);
}

static
int Open(const char *fname) {
        int fd = open(fname, O_RDONLY);
        if (fd < 0) {
                perror(fname);
                exit(errno);
        }
        return fd;
}

void skipHttpHeader(int fd)
{
    char c, n;
    n = 0;
    while(n < 2 && read(fd, &c, 1) > 0) {
        switch(c) {
        case '\r':
            break;
        case '\n':
            n += 1;
            break;
        default:
            n = 0;
        }
    }
}

#if TEST_HUFFMAN
char *bits(HuffmanCode huffman, int k)
{
    static char res[256];
    int i;

    for(i=0;i<huffman.len[k];i++)
        res[i]=(huffman.bits[k]&1<<i)?'1':'0';
    res[i]='\0';
    return res;
}

int main(int argc, char *argv[]) {
    int count[65536];
    int i;
    HuffmanCode huffman;

    memset(count,0,sizeof(count));
    for(i=0; i<strlen(argv[1]); i++)
        count[(unsigned char)argv[1][i]]++;

    huffman = huffmanCode(count);
    for(i=0;i<65536;i++)
        if (count[i])
            printf("%2x/%c: %4d %d %s\n", i, i, count[i], huffman.len[i], bits(huffman,i));
    return 0;
}
    
#else
int main(int argc, char *argv[]) {
        int fd1, fd2;
        if (argc != 3) {
                fprintf(stderr, "usage: %s <digest_old> <digest_new>\n", argv[0]);
                return -1;
        }

        fd1 = Open(argv[1]);
        skipHttpHeader(fd1);
        fd2 = Open(argv[2]);
        skipHttpHeader(fd2);

        diff(fd1, fd2);

        close(fd1);
        close(fd2);

        return 0;
}
#endif
Received on Tue Jul 29 2003 - 13:15:59 MDT

This archive was generated by hypermail pre-2.1.9 : Tue Dec 09 2003 - 16:12:16 MST