Changeset 262

Show
Ignore:
Timestamp:
08/16/2006 12:04:55 AM
Author:
shiva
Message:

From ssphys-trust-encoding branch: Eliminate XML character set
sanitizing and trust that VSS character set is in the declared
encoding.
From Unicode branch: Declare UTF-8 encoding around sqlite2 DBI calls
to prevent double-encoding. Output UTF-8 to DataCache?.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/script/Vss2Svn/DataCache.pm

    r250 r262  
    4545        return undef; 
    4646    } 
     47 
     48    # we'll be sending UTF-8 to this handle 
     49    binmode $self->{fh}, ":utf8"; 
    4750 
    4851    return $self; 
  • trunk/script/Vss2Svn/Dumpfile.pm

    r248 r262  
    33use Vss2Svn::Dumpfile::Node; 
    44use Vss2Svn::Dumpfile::SanityChecker; 
    5 use Encode qw(from_to)
     5require Encode
    66 
    77use warnings; 
     
    8787    $author = '' if !defined($author); 
    8888 
    89     # convert to utf8 
    90     from_to ($comment, "windows-1252", "utf8"); 
    91     from_to ($author, "windows-1252", "utf8"); 
     89    $comment = Encode::decode_utf8( $comment ); 
     90    $author = Encode::decode_utf8( $author ); 
    9291 
    9392    if ($revision > 0) { 
     
    704703 
    705704    my $string = $node->get_headers(); 
    706     from_to ($string, "windows-1252", "utf8"); 
    707705    print $fh $string; 
    708706    $self->output_content($node->{hideprops}? undef : $node->{props}, 
  • trunk/script/vss2svn.pl

    r261 r262  
    1919use Vss2Svn::SvnRevHandler; 
    2020use Vss2Svn::Dumpfile; 
     21 
     22require Encode; 
    2123 
    2224our(%gCfg, %gSth, %gErr, %gFh, $gSysOut, %gActionType, %gNameLookup, %gId); 
     
    471473 
    472474    while(defined($row = $sth->fetchrow_hashref() )) { 
    473         $gNameLookup{ $row->{offset} } = $row->{name}
     475        $gNameLookup{ $row->{offset} } = Encode::decode_utf8( $row->{name} )
    474476    } 
    475477}  #  End LoadNameLookup 
     
    712714                        . "'$row->{itemtype}' unexpected"); 
    713715        } 
     716 
     717        $row->{itemname} = Encode::decode_utf8( $row->{itemname} ); 
    714718 
    715719        # The handler's job is to keep track of physical-to-real name mappings 
  • trunk/ssphys/SSPhys/Formatter.cpp

    r196 r262  
    5454    : m_pCurrentFileNode (NULL) 
    5555  { 
     56    // patch this line to match your VSS DB's locale 
    5657    TiXmlDeclaration decl ("1.0", "windows-1252", ""); 
    57     if (NULL == setlocale (LC_ALL, ".1252")) 
    58       std::cerr << "WARNING: unable to correctly set the windows-1252 locale" << std::endl; 
    5958    m_Document.InsertEndChild (decl); 
    6059  } 
  • trunk/ssphys/SSPhysLib/XML.cpp

    r157 r262  
    55#include "StdAfx.h" 
    66#include "XML.h" 
    7  
    8 class CValidXMLChar 
    9 { 
    10 public: 
    11   bool operator () (char c) 
    12   { 
    13     unsigned char uc = static_cast <unsigned char> (c); 
    14     bool b = true; 
    15     if ( (uc < 0x20 && uc != 0x09 && uc != 0x0A && uc != 0x0D) 
    16       || (uc >= 0x7f && uc <= 0x84) 
    17       || (uc >= 0x86 && uc <= 0x9f) ) 
    18     { 
    19       m_bInvalidOccured = true; 
    20       return true; 
    21     } 
    22     return false; 
    23   } 
    24   bool m_bInvalidOccured; 
    25 }; 
    26  
    27 std::string sanitizeForXML (const std::string& input) 
    28 { 
    29   std::string output (input); 
    30   CValidXMLChar validXMLChar; 
    31   std::replace_if (output.begin (), output.end (), validXMLChar, '_'); 
    32   return output; 
    33 } 
    34  
    35 // --------------------------------------------------------------- 
    367 
    378XMLNode::XMLNode (XMLNode* pParent, std::string name, AttribMap attrib) 
     
    4617  for (; itor != attrib.end (); ++itor) 
    4718  { 
    48     m_Node.SetAttribute(itor->first, sanitizeForXML (itor->second)); 
     19    m_Node.SetAttribute(itor->first, itor->second); 
    4920  } 
    5021} 
     
    6233void XMLNode::SetText (std::string text) 
    6334{ 
    64   TiXmlText xmlText (sanitizeForXML (text)); 
     35  TiXmlText xmlText (text); 
    6536  m_Node.InsertEndChild(xmlText); 
    6637} 
     
    8253void XMLText::SetValue (std::string value) 
    8354{ 
    84   m_Text.SetValue (sanitizeForXML (value)); 
     55  m_Text.SetValue (value); 
    8556} 
    86  
    87  
  • trunk/ssphys/utils/tinyxml.cpp

    r195 r262  
    128128                        ++i; 
    129129                } 
    130                 else if (isprint (c)) 
     130                else 
    131131                { 
     132                        // just pass these through, since we've 
     133                        // declared an encoding that presumably allows 
     134                        // them 
     135                        *outString += (char) c; // somewhat more efficient function call. 
    132136                        //char realc = (char) c; 
    133137                        //outString->append( &realc, 1 ); 
    134                         *outString += (char) c; // somewhat more efficient function call. 
    135                         ++i; 
    136                 } 
    137                 else 
    138                 { 
    139                         // in any other case, we drop this character 
    140138                        ++i; 
    141139                } 

These ads are automatically generated by Google based on the content of this page. Revenue from these ads helps to pay for hosting fees of this site; however, these ads do not constitute an endorsement by PumaCode.org.