<?php

if (isset($_GET['src'])) {highlight_file(__FILE__); die();}

/*
 
  CHANGELOG
  
    2005-03-03 Fixed "valign" property converted to plain "v"
    2005-03-03 First public preview ;)
    2004-12-02 Initial version.
 
  TODO:
   *) Tables cleaning support
   *) Images support
   *) What to do with localsrc images?
   *) Handle those: <p><br/><br/></p>
   *) v:* properties (<img v:shapes=...>)
   *) Balance tags (<strong><em></strong></em> -> <strong><em></em><em><strong></strong></em>)
   *) Other checks against proper XML

  (c) 2004 - 2005 Kaspars Foigts (http://laacz.lv/)
  
  Contact e-mail - l.a.a.c.z.[.@.].l.a.a.c.z...l.v.)
  
  SOURCE CODE USAGE (or should I call it LICENCE?)
  
  This code may be used, modified and distributed with no restrictions as long, as copyright
  is preserved. However, if any modifications are made, you should notify me about changes.
  
  Any bugs, feature suggestions and everything else you would like to inform me about, should
  be sent to me as well.
  
  No extensive testing was ever performed. Use it on your own risk.
  
*/


$i = isset($_GET['i']) ? (int)$_GET['i'] : 1;
if (!file_exists('word' . $i . '.txt')) {
    $i = 1;
}
$data = file_get_contents('word' . $i . '.txt');
header('content-type: text/plain; charset=utf-8');

# Function cleans up Word's HTML mess. $data is Word's HTML to clean.
# Function returns cleaned up HTML.
function wordcleaner($data) {
    # Function applies regular expression $re replacement with $repl on string
    # $data (passed by reference), while no more matches of $re in $data are
    # found.
    function applyre($re, $repl, &$data) {
        while (preg_match($re, $data)) {
            $data = preg_replace($re, $repl, $data);
        }
    }
    
    # Remove unneedet style="" components (sizes)
    applyre('/<([^>]+)style="([^"]*)(font-size|text-indent|padding|padding-top|padding-right|padding-bottom|padding-left|margin|margin-top|margin-right|margin-left|margin-bottom|width|height):(\s*[\-0-9\.]+(cm|pt|em|px|%)\s*)+(;)?\s*([^"]*)"([^>]*)>/', '<\1style="\2\7"\8>', $data);
    # Repair (by removing, of course:) font-family doublequoted
    applyre('/<([^>]+style="[^>]*)\s*font-family:\s*"[^"]+";\s*([^>]*)>/', '<\1\2>', $data);
    # Remove unneedet style="" components (colors)
    applyre('/<([^>]+)style="([^"]*)\s*(background-color|color):\s*[#,\s0-9\.\(\)rgb]+\s*(;)?\s*([^"]*)"([^>]*)>/', '<\1style="\2\5"\6>', $data);
    # Remove unneedet style="" components (fonts)
    applyre('/<([^>]+style="[^"]*)\s*font-family:[^;"]+;?([^"]*)"([^>]*)>/', '<\1\2"\3>', $data);
    # Remove unneedet style="" components (backgrounds)
    applyre('/<([^>]+style="[^"]*)\s*background:[^;"]+[;"]\s*([^>]*)>/', '<\1\2>', $data);
    # Remove unneedet style="" components (-moz-*) (occurs, when pasting from moz to word)
    applyre('/<([^>]+style="[^"]*)\s*\-moz\-[a-z0-9\-]+:[^";]+[;"]\s*([^>]*)>/', '<\1\2>', $data);
    
    # Remove proprietary comments
    applyre('/<!--[^>]+>/U', '', $data);
    applyre('/<![^>]+-->/U', '', $data);
    # Remove classnames, type="" (for lists)
    applyre('/(\s*(type|class)="[^"]+")/', '', $data);
    # Remove lang's
    applyre('/(\s*lang="[^"]+")/', '', $data);
    # Remove empty styles
    applyre('/(\s*style="")/', '', $data);
    # Remove <(o|v|w|...):...> tags
    applyre('/(<\/?.:.*>)/sU', '', $data);
    # Remove span's without any style information attached
    applyre('/<span\s*>(.*)<\/span>/sU', '\1', $data);
    # Convert b(olds) to strongs
    applyre('/<(\/)?b>/U', '<\1strong>', $data);
    # Convert i(talics) to emphasis
    applyre('/<(\/)?i>/U', '<\em>', $data);
    # Remove align="..." stuff
    applyre('/<([^>]+)\s*[^v]*align=(")?[^"]+\2([^>]*)?>/U', '<\1\3>', $data);
    # Remove v:*="..." stuff
    applyre('/<([^>]+)\s*v:[a-z]+=(")?[^"]+\2([^>]*)?>/U', '<\1\3>', $data);
    # Remove all empty elements
    applyre('/<([a-z0-9]+).*>\s*<\/\1>/U', '', $data);
    # Remove elements, which close themselves and then open again </code><code>
    applyre('/<\/([a-z]+)><\1>/U', '', $data);
    return $data;
}
echo wordcleaner($data);

?>