Commit 7fbbe778 authored by Friedemann Kleint's avatar Friedemann Kleint
Browse files

Debugger[CDB]: Rework string formatting.

Use watch encoding without quotes for non-ASCII output.
Use formats with quotes for recoded strings.
Extend char-pointer recoding to work for char-arrays as well
and restrict to char types. Split up and re-use when
outputting QByteArrays or std::[w]string.

Task-number: QTCREATORBUG-5667
Change-Id: If9748c2e375ee5ecb05d257410eba4012de1c3cf
Reviewed-on: http://codereview.qt.nokia.com/2747


Reviewed-by: default avatarFriedemann Kleint <Friedemann.Kleint@nokia.com>
parent 815d9cb1
......@@ -206,6 +206,31 @@ inline char toHexDigit(unsigned v)
return char(v - 10) + 'a';
}
// Strings from raw data.
std::wstring quotedWStringFromCharData(const unsigned char *data, size_t size)
{
std::wstring rc;
rc.reserve(size + 2);
rc.push_back(L'"');
const unsigned char *end = data + size;
for ( ; data < end; data++)
rc.push_back(wchar_t(*data));
rc.push_back(L'"');
return rc;
}
std::wstring quotedWStringFromWCharData(const unsigned char *dataIn, size_t sizeIn)
{
std::wstring rc;
const wchar_t *data = reinterpret_cast<const wchar_t *>(dataIn);
const size_t size = sizeIn / sizeof(wchar_t);
rc.reserve(size + 2);
rc.push_back(L'"');
rc.append(data, data + size);
rc.push_back(L'"');
return rc;
}
// String from hex "414A" -> "AJ".
std::string stringFromHex(const char *p, const char *end)
{
......
......@@ -178,6 +178,10 @@ std::string wStringToGdbmiString(const std::wstring &w);
std::string wStringToString(const std::wstring &w);
std::wstring stringToWString(const std::string &w);
// Strings from raw data.
std::wstring quotedWStringFromCharData(const unsigned char *data, size_t size);
std::wstring quotedWStringFromWCharData(const unsigned char *data, size_t size);
// String from hex "414A" -> "AJ".
std::string stringFromHex(const char *begin, const char *end);
// Decode hex to a memory area.
......
......@@ -73,7 +73,7 @@ static inline void debugNodeFlags(std::ostream &str, unsigned f)
if (f & SymbolGroupNode::ComplexDumperOk)
str << " ComplexDumperOk";
if (f & SymbolGroupNode::WatchNode)
str << " WatchNode";
QTCREATORBUG-5667 str << " WatchNode";
str << ' ';
}
......@@ -346,110 +346,160 @@ enum PointerFormats // Watch data pointer format requests
enum DumpEncoding // WatchData encoding of GDBMI values
{
DumpEncodingAscii = 0,
DumpEncodingBase64 = 1,
DumpEncodingBase64_Utf16 = 2,
DumpEncodingBase64_Ucs4 = 3,
DumpEncodingHex_Latin1 = 6,
DumpEncodingHex_Utf16 = 7,
DumpEncodingHex_Ucs4_LittleEndian = 8,
DumpEncodingHex_Utf8_LittleEndian = 9,
DumpEncodingHex_Ucs4_BigEndian = 10,
DumpEncodingHex_Utf16_BigEndian = 11,
DumpEncodingHex_Utf16_LittleEndian = 12
DumpEncodingBase64_Utf16_WithQuotes = 2,
DumpEncodingHex_Ucs4_LittleEndian_WithQuotes = 3,
DumpEncodingBase64_Utf16 = 4,
DumpEncodingHex_Latin1_WithQuotes = 6,
DumpEncodingHex_Utf8_LittleEndian_WithQuotes = 9
};
/* Recode arrays/pointers of char*, wchar_t according to users
* sepcification. Handles char formats for 'char *', '0x834478 "hallo.."'
* and 'wchar_t *', '0x834478 "hallo.."'.
* This is done by retrieving the address and the length (in characters)
* specification. Handles char formats for 'char *', '0x834478 "hallo.."'
* and 'wchar_t *', '0x834478 "hallo.."', 'wchar_t[56] "hallo"', etc.
* This is done by retrieving the address (from the pointer value for
* pointers, using passed in-address for arrays) and the length
* (in characters excluding \0)
* of the CDB output, converting it to memory size, fetching the data
* from memory, zero-terminating and recoding it using the encoding
* from memory, and recoding it using the encoding
* defined in watchutils.cpp.
* As a special case, if there is no user-defined format and the
* CDB output contains '?' indicating non-printable characters,
* switch to latin1 (8bit) such that the watchmodel formatting options trigger. */
bool DumpParameters::recode(const std::string &type,
* CDB output contains '?'/'.' (CDB version?) indicating non-printable
* characters, switch to a suitable type such that the watchmodel
* formatting options trigger.
* This is split into a check step that returns a struct containing
* an allocated buffer with the raw data/size and recommended format
* (or 0 if recoding is not applicable) and the actual recoding step.
* Step 1) is used by std::string dumpers to potentially reformat
* the arrays. */
DumpParameterRecodeResult
DumpParameters::checkRecode(const std::string &type,
const std::string &iname,
const std::wstring &value,
const SymbolGroupValueContext &ctx,
std::wstring *value, int *encoding) const
ULONG64 address,
const DumpParameters *dp /* =0 */)
{
enum ReformatType { ReformatNone, ReformatPointer, ReformatArray };
DumpParameterRecodeResult result;
// We basically handle char formats for 'char *', '0x834478 "hallo.."'
// and 'wchar_t *', '0x834478 "hallo.."'
// Determine address and length from the pointer value output,
// read the raw memory and recode if that is possible.
if (type.empty() || type.at(type.size() - 1) != '*')
return false;
int newFormat = format(type, iname);
if (value->compare(0, 2, L"0x"))
return false;
const std::wstring::size_type quote1 = value->find(L'"', 2);
if (quote1 == std::wstring::npos)
return false;
// The user did not specify any format, still, there are '?'
// (indicating non-printable) in what the debugger prints. In that case,
// append a hex dump to the normal output. If there are no '?'-> all happy.
if (newFormat < FormatLatin1String) {
const bool hasNonPrintable = value->find(L'?', quote1 + 1) != std::wstring::npos;
if (type.empty() || value.empty())
return result;
const std::wstring::size_type quote2 = value.size() - 1;
if (value.at(quote2) != L'"')
return result;
ReformatType reformatType = ReformatNone;
switch (type.at(type.size() - 1)) {
case '*':
reformatType = ReformatPointer;
if (value.compare(0, 2, L"0x"))
return result;
break;
case ']':
reformatType = ReformatArray;
break;
default:
return result;
}
// Check for a reformattable type (do not trigger for a 'std::string *').
if (type.compare(0, 4, "char") == 0
|| type.compare(0, 13, "unsigned char") == 0) {
} else if (type.compare(0, 7, "wchar_t", 0, 7) == 0
|| type.compare(0, 14, "unsigned short") == 0) {
result.isWide = true;
} else {
return result;
}
// Empty string?
const std::wstring::size_type quote1 = value.find(L'"', 2);
if (quote1 == std::wstring::npos || quote2 == quote1)
return result;
const std::wstring::size_type length = quote2 - quote1 - 1;
if (!length)
return result;
// Choose format
result.recommendedFormat = dp ? dp->format(type, iname) : FormatAuto;
// The user did not specify any format, still, there are '?'/'.'
// (indicating non-printable) in what the debugger prints.
// Reformat in this case. If there are no '?'-> all happy.
if (result.recommendedFormat < FormatLatin1String) {
const bool hasNonPrintable = value.find(L'?', quote1 + 1) != std::wstring::npos
|| value.find(L'.', quote1 + 1) != std::wstring::npos;
if (!hasNonPrintable)
return false; // All happy, no need to re-encode
return result; // All happy, no need to re-encode
// Pass as on 8-bit such that Watchmodel's reformatting can trigger.
newFormat = FormatLatin1String;
result.recommendedFormat = result.isWide ?
FormatUtf16String : FormatLatin1String;
}
const std::wstring::size_type quote2 = value->find(L'"', quote1 + 1);
if (quote2 == std::wstring::npos)
return false;
std::wstring::size_type length = quote2 - quote1 - 1;
if (!length)
return false;
// Get address from value
ULONG64 address = 0;
if (!integerFromWString(value->substr(0, quote1 - 1), &address) || !address)
return false;
// Get real size if this is for example a wchar_t *.
const unsigned elementSize = SymbolGroupValue::sizeOf(SymbolGroupValue::stripPointerType(type).c_str());
// Get address from value if it is a pointer.
if (reformatType == ReformatPointer) {
address = 0;
if (!integerFromWString(value.substr(0, quote1 - 1), &address) || !address)
return result;
}
// Get real size (excluding 0) if this is for example a wchar_t *.
// Make fit to 2/4 character boundaries.
const std::string elementType = reformatType == ReformatPointer ?
SymbolGroupValue::stripPointerType(type) :
SymbolGroupValue::stripArrayType(type);
const unsigned elementSize = SymbolGroupValue::sizeOf(elementType.c_str());
if (!elementSize)
return false;
length *= elementSize;
// Allocate real length + 8 bytes ('\0') for largest format (Ucs4).
// '\0' is not listed in the CDB output.
const std::wstring::size_type allocLength = length + 8;
unsigned char *buffer = new unsigned char[allocLength];
std::fill(buffer, buffer + allocLength, 0);
return result;
result.size = length * elementSize;
switch (result.recommendedFormat) {
case FormatUtf16String: // Paranoia: make sure buffer is terminated at 2 byte borders
if (result.size % 2)
result.size &= ~1;
break;
case FormatUcs4String: // Paranoia: make sure buffer is terminated at 4 byte borders
if (result.size % 4)
result.size &= ~3;
break;
}
result.buffer = new unsigned char[result.size];
std::fill(result.buffer, result.buffer + result.size, 0);
ULONG obtained = 0;
if (FAILED(ctx.dataspaces->ReadVirtual(address, buffer, ULONG(length), &obtained))) {
delete [] buffer;
return false;
if (FAILED(ctx.dataspaces->ReadVirtual(address, result.buffer, ULONG(result.size), &obtained))) {
delete [] result.buffer;
result = DumpParameterRecodeResult();
}
return result;
}
bool DumpParameters::recode(const std::string &type,
const std::string &iname,
const SymbolGroupValueContext &ctx,
ULONG64 address,
std::wstring *value, int *encoding) const
{
const DumpParameterRecodeResult check
= checkRecode(type, iname, *value, ctx, address, this);
if (!check.buffer)
return false;
// Recode raw memory
switch (newFormat) {
switch (check.recommendedFormat) {
case FormatLatin1String:
*value = dataToHexW(buffer, buffer + length + 1); // Latin1 + 0
*encoding = DumpEncodingHex_Latin1;
*value = dataToHexW(check.buffer, check.buffer + check.size); // Latin1 + 0
*encoding = DumpEncodingHex_Latin1_WithQuotes;
break;
case FormatUtf8String:
*value = dataToHexW(buffer, buffer + length + 1); // UTF8 + 0
*encoding = DumpEncodingHex_Utf8_LittleEndian;
*value = dataToHexW(check.buffer, check.buffer + check.size); // UTF8 + 0
*encoding = DumpEncodingHex_Utf8_LittleEndian_WithQuotes;
break;
case FormatUtf16String: // Paranoia: make sure buffer is terminated at 2 byte borders
if (length % 2) {
length &= ~1;
buffer[length] = '\0';
buffer[length + 1] = '\0';
}
*value = base64EncodeToWString(buffer, length + 2);
*encoding = DumpEncodingBase64_Utf16;
*value = base64EncodeToWString(check.buffer, check.size);
*encoding = DumpEncodingBase64_Utf16_WithQuotes;
break;
case FormatUcs4String: // Paranoia: make sure buffer is terminated at 4 byte borders
if (length % 4) {
length &= ~3;
std::fill(buffer + length, buffer + length + 4, 0);
}
*value = dataToHexW(buffer, buffer + length + 2); // UTF16 + 0
*encoding = DumpEncodingHex_Ucs4_LittleEndian;
*value = dataToHexW(check.buffer, check.buffer + check.size); // UTF16 + 0
*encoding = DumpEncodingHex_Ucs4_LittleEndian_WithQuotes;
break;
}
delete [] buffer;
delete [] check.buffer;
return true;
}
......@@ -994,7 +1044,8 @@ int SymbolGroupNode::dumpNode(std::ostream &str,
if (addr)
str << ",addr=\"" << std::hex << std::showbase << addr << std::noshowbase << std::dec << '"';
if (const ULONG s = size())
const ULONG s = size();
if (s)
str << ",size=\"" << s << '"';
const bool uninitialized = flags() & Uninitialized;
bool valueEditable = !uninitialized;
......@@ -1003,7 +1054,7 @@ int SymbolGroupNode::dumpNode(std::ostream &str,
// Shall it be recoded?
std::wstring value = simpleDumpValue(ctx);
int encoding = 0;
if (dumpParameters.recode(t, aFullIName, ctx, &value, &encoding)) {
if (dumpParameters.recode(t, aFullIName, ctx, addr, &value, &encoding)) {
str << ",valueencoded=\"" << encoding
<< "\",value=\"" << gdbmiWStringFormat(value) <<'"';
} else { // As is: ASCII or base64?
......
......@@ -49,6 +49,17 @@ class SymbolGroup;
struct SymbolGroupValueContext;
class SymbolGroupNode;
// Helper struct used for check results when recoding CDB char pointer output.
struct DumpParameterRecodeResult
{
DumpParameterRecodeResult() : buffer(0), size(0), recommendedFormat(0),isWide(false) {}
unsigned char *buffer;
size_t size;
int recommendedFormat;
bool isWide;
};
struct DumpParameters
{
typedef std::map<std::string, int> FormatMap; // type or iname to format
......@@ -63,8 +74,17 @@ struct DumpParameters
// Helper to decode format option arguments.
static FormatMap decodeFormatArgument(const std::string &f);
static DumpParameterRecodeResult
checkRecode(const std::string &type, const std::string &iname,
const std::wstring &value,
const SymbolGroupValueContext &ctx,
ULONG64 address,
const DumpParameters *dp =0);
bool recode(const std::string &type, const std::string &iname,
const SymbolGroupValueContext &ctx,
ULONG64 address,
std::wstring *value, int *encoding) const;
int format(const std::string &type, const std::string &iname) const;
......
......@@ -1394,6 +1394,13 @@ void formatKnownTypeFlags(std::ostream &os, KnownType kt)
os << " simple_dumper";
}
static inline DumpParameterRecodeResult
checkCharArrayRecode(const SymbolGroupValue &v)
{
return DumpParameters::checkRecode(v.type(), std::string(),
v.value(), v.context(), v.address());
}
// Helper struct containing data Address and size/alloc information
// from Qt's QString/QByteArray.
struct QtStringAddressData
......@@ -1554,9 +1561,15 @@ static inline bool dumpQByteArray(const SymbolGroupValue &v, std::wostream &str)
return false;
// Qt 4.
if (qtInfo.version < 5) {
// TODO: More sophisticated dumping of binary data?
if (const SymbolGroupValue data = dV["data"]) {
str << data.value();
const DumpParameterRecodeResult check =
checkCharArrayRecode(data);
if (check.buffer) {
str << quotedWStringFromCharData(check.buffer, check.size);
delete [] check.buffer;
} else {
str << data.value();
}
return true;
}
return false;
......@@ -2039,7 +2052,17 @@ static bool dumpStd_W_String(const SymbolGroupValue &v, int type, std::wostream
const SymbolGroupValue string = bufSize <= reserved ? bx["_Ptr"] : bx["_Buf"];
if (!string)
return false;
str << string.value();
// Potentially re-code char arrays (preferably relying on
// CDB to initially format the string array).
const DumpParameterRecodeResult recode = checkCharArrayRecode(string);
if (recode.buffer) {
str << (type == KT_StdString ?
quotedWStringFromCharData(recode.buffer, recode.size) :
quotedWStringFromWCharData(recode.buffer, recode.size));
delete [] recode.buffer;
} else {
str << string.value();
}
return true;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment