Wie man C++-Structs mit vorhergehenden Kommentarzeilen aus Quellcode mit clang parst

Hinweis: Verwandte Posts:

In unseren vorherigen Posts (siehe oben) haben wir gezeigt, wie man C++-struct-Definitionen aus Quellcode mit dem Clang-C++-Parser extrahiert. In diesem Post erweitern wir diese Funktionalität um vorhergehende Kommentarzeilen.

Das folgende modifizierte Code-Snippet demonstriert, wie man C++-Structs parst und ihre Felder extrahiert, einschließlich der vorhergehenden Kommentarzeile. Während derzeit nur einzeilige vorhergehende Kommentare geparst werden können, kann es erweitert werden, um auch mehrzeilige Kommentare zu behandeln.

Beispieldaten

example_structs.cpp
/* This struct defines my parameters */
typedef struct {
    double a; /* That weird parameter */
    double b; /* Another weird parameter */
} myParameters;

typedef struct {
    double x; /* The x coordinate */
    double y; /* The y coordinate */
} myPoint;

// This struct is used to pass parameters to a function
typedef struct {
    myParameters params; /* Parameters for the function */
    myPoint point;       /* Point to evaluate */
} myFunctionInput;
parse_struct_with_comments.cpp
#include <clang-c/Index.h>
#include <iostream>
#include <vector>
#include <string>
#include <cstring>
#include <fstream>
#include <rapidjson/document.h>
#include <rapidjson/writer.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/prettywriter.h>

std::string extractInlineComment(CXCursor cursor, const char* sourceCode) {
    if (sourceCode == nullptr) {
        return "";
    }

    CXSourceRange range = clang_getCursorExtent(cursor);
    CXSourceLocation start = clang_getRangeStart(range);
    CXSourceLocation end = clang_getRangeEnd(range);

    CXFile file;
    unsigned startLine, startColumn, startOffset;
    unsigned endLine, endColumn, endOffset;

    clang_getExpansionLocation(start, &file, &startLine, &startColumn, &startOffset);
    clang_getExpansionLocation(end, &file, &endLine, &endColumn, &endOffset);

    // Find the line containing this field declaration
    const char* lineStart = sourceCode;
    unsigned currentLine = 1;

    // Navigate to the line containing the field
    while (currentLine < startLine && *lineStart) {
        if (*lineStart == '\n') {
            currentLine++;
        }
        lineStart++;
    }

    // Find the end of the line
    const char* lineEnd = lineStart;
    while (*lineEnd && *lineEnd != '\n') {
        lineEnd++;
    }

    // Extract the line as a string
    std::string line(lineStart, lineEnd - lineStart);

    // Look for comments in this line
    std::string comment = "";

    // Look for /* */ style comments
    size_t blockStart = line.find("/*");
    if (blockStart != std::string::npos) {
        size_t blockEnd = line.find("*/", blockStart);
        if (blockEnd != std::string::npos) {
            comment = line.substr(blockStart, blockEnd - blockStart + 2);
        }
    }

    // Look for // style comments if no block comment found
    if (comment.empty()) {
        size_t lineCommentStart = line.find("//");
        if (lineCommentStart != std::string::npos) {
            comment = line.substr(lineCommentStart);
        }
    }

    return comment;
}

std::string extractPrecedingComment(CXCursor cursor, const char* sourceCode) {
    if (sourceCode == nullptr) {
        return "";
    }

    CXSourceRange range = clang_getCursorExtent(cursor);
    CXSourceLocation start = clang_getRangeStart(range);

    CXFile file;
    unsigned startLine, startColumn, startOffset;

    clang_getExpansionLocation(start, &file, &startLine, &startColumn, &startOffset);

    // If we're on the first line, there's no preceding line
    if (startLine <= 1) {
        return "";
    }

    // Find the line before the struct definition
    const char* lineStart = sourceCode;
    unsigned currentLine = 1;

    // Navigate to the line before the struct definition
    while (currentLine < startLine - 1 && *lineStart) {
        if (*lineStart == '\n') {
            currentLine++;
        }
        lineStart++;
    }

    // Find the end of the preceding line
    const char* lineEnd = lineStart;
    while (*lineEnd && *lineEnd != '\n') {
        lineEnd++;
    }

    // Extract the line as a string
    std::string line(lineStart, lineEnd - lineStart);

    // Trim whitespace from the line
    size_t start_pos = line.find_first_not_of(" \t\r");
    if (start_pos == std::string::npos) {
        return "";
    }

    line = line.substr(start_pos);
    size_t end_pos = line.find_last_not_of(" \t\r");
    if (end_pos != std::string::npos) {
        line = line.substr(0, end_pos + 1);
    }

    // Check if the line is a comment
    if (line.find("//") == 0 || (line.find("/*") == 0 && line.find("*/") != std::string::npos)) {
        return line;
    }

    return "";
}

struct VisitorData {
    const char* sourceCode;
    rapidjson::Value* fieldsArray;
    rapidjson::Document::AllocatorType* allocator;
};

void extractStructFields(CXCursor cursor, const char* sourceCode, rapidjson::Document& doc) {
    CXCursorKind kind = clang_getCursorKind(cursor);
    if (kind == CXCursor_StructDecl) {
        CXString structName = clang_getCursorDisplayName(cursor);
        std::string structNameStr = clang_getCString(structName);

        // Extract preceding comment
        std::string precedingComment = extractPrecedingComment(cursor, sourceCode);

        // Create struct object in JSON
        rapidjson::Value structObj(rapidjson::kObjectType);
        rapidjson::Value fieldsArray(rapidjson::kArrayType);

        clang_disposeString(structName);

        VisitorData data = {sourceCode, &fieldsArray, &doc.GetAllocator()};

        clang_visitChildren(cursor, [](CXCursor c, CXCursor parent, CXClientData client_data) {
            VisitorData* data = static_cast<VisitorData*>(client_data);
            const char* sourceCode = data->sourceCode;
            rapidjson::Value* fieldsArray = data->fieldsArray;
            rapidjson::Document::AllocatorType* allocator = data->allocator;

            CXCursorKind kind = clang_getCursorKind(c);
            if (kind == CXCursor_FieldDecl) {
                CXString fieldName = clang_getCursorDisplayName(c);
                CXType fieldType = clang_getCursorType(c);
                CXString typeName = clang_getTypeSpelling(fieldType);

                std::string comment = extractInlineComment(c, sourceCode);
                std::string fieldNameStr = clang_getCString(fieldName);
                std::string typeNameStr = clang_getCString(typeName);

                // Create field object
                rapidjson::Value fieldObj(rapidjson::kObjectType);
                rapidjson::Value nameVal(fieldNameStr.c_str(), *allocator);
                rapidjson::Value typeVal(typeNameStr.c_str(), *allocator);
                rapidjson::Value commentVal(comment.c_str(), *allocator);

                fieldObj.AddMember("name", nameVal, *allocator);
                fieldObj.AddMember("type", typeVal, *allocator);
                fieldObj.AddMember("comment", commentVal, *allocator);

                fieldsArray->PushBack(fieldObj, *allocator);

                clang_disposeString(fieldName);
                clang_disposeString(typeName);
            }
            return CXChildVisit_Continue;
        }, &data);

        // Add struct to document
        structObj.AddMember("fields", fieldsArray, doc.GetAllocator());
        if (!precedingComment.empty()) {
            rapidjson::Value precedingCommentVal(precedingComment.c_str(), doc.GetAllocator());
            structObj.AddMember("precedingComment", precedingCommentVal, doc.GetAllocator());
        }
        rapidjson::Value structNameVal(structNameStr.c_str(), doc.GetAllocator());
        doc.AddMember(structNameVal, structObj, doc.GetAllocator());
    }
}

int main(int argc, char* argv[]) {
    if (argc != 2) {
        std::cerr << "Usage: " << argv[0] << " <filename>" << std::endl;
        return 1;
    }

    const char* filename = argv[1];
    CXIndex index = clang_createIndex(0, 0);

    CXTranslationUnit unit = clang_parseTranslationUnit(index, filename, nullptr, 0, nullptr, 0,
                                                    CXTranslationUnit_DetailedPreprocessingRecord |
                                                    CXTranslationUnit_SkipFunctionBodies);
    if (unit == nullptr) {
        std::cerr << "Failed to parse translation unit." << std::endl;
        return 1;
    }

    // Create JSON document
    rapidjson::Document doc;
    doc.SetObject();

    // Read the source file content
    std::ifstream file(filename, std::ios::binary);
    if (!file) {
        std::cerr << "Failed to open file: " << filename << std::endl;
        return 1;
    }

    file.seekg(0, std::ios::end);
    size_t fileSize = file.tellg();
    file.seekg(0, std::ios::beg);

    std::string sourceCode(fileSize, '\0');
    file.read(&sourceCode[0], fileSize);
    file.close();

    struct CallbackData {
        rapidjson::Document* doc;
        const char* sourceCode;
    };

    CallbackData callbackData = {&doc, sourceCode.c_str()};

    CXCursor cursor = clang_getTranslationUnitCursor(unit);
    clang_visitChildren(cursor, [](CXCursor c, CXCursor parent, CXClientData client_data) {
        CallbackData* data = static_cast<CallbackData*>(client_data);
        extractStructFields(c, data->sourceCode, *(data->doc));
        return CXChildVisit_Continue;
    }, &callbackData);

    // Convert JSON to string and output
    rapidjson::StringBuffer buffer;
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
    doc.Accept(writer);

    std::cout << buffer.GetString() << std::endl;

    clang_disposeTranslationUnit(unit);
    clang_disposeIndex(index);
    return 0;
}

Wie man kompiliert

Installieren Sie auf Ubuntu die erforderlichen Bibliotheken mit

install_libclang19.sh
sudo apt -y install libclang-19-dev

und kompilieren Sie den Code mit

build_parse_struct.sh
g++ parse_struct_with_comments.cpp -o parse_struct_with_comments -std=c++17 -I/usr/lib/llvm-19/include -L/usr/lib/llvm-19/lib -lclang

Testlauf

Laden Sie dies als test_struct.h herunter:

test_struct.h
/* This struct defines my parameters */
typedef struct {
    double a; /* That weird parameter */
    double b; /* Another weird parameter */
} myParameters;

typedef struct {
    double x; /* The x coordinate */
    double y; /* The y coordinate */
} myPoint;

// This struct is used to pass parameters to a function
typedef struct {
    myParameters params; /* Parameters for the function */
    myPoint point;       /* Point to evaluate */
} myFunctionInput;

Führen Sie dann das Programm aus:

run_parse_struct_test.sh
./parse_struct test_struct.h  | jq

Beispiel-Ausgabe

parse_struct_output.json
{
    "myParameters": {
        "fields": [
            {
                "name": "a",
                "type": "double",
                "comment": "/* That weird parameter */"
            },
            {
                "name": "b",
                "type": "double",
                "comment": "/* Another weird parameter */"
            }
        ],
        "precedingComment": "/* This struct defines my parameters */"
    },
    "myPoint": {
        "fields": [
            {
                "name": "x",
                "type": "double",
                "comment": "/* The x coordinate */"
            },
            {
                "name": "y",
                "type": "double",
                "comment": "/* The y coordinate */"
            }
        ]
    },
    "myFunctionInput": {
        "fields": [
            {
                "name": "params",
                "type": "myParameters",
                "comment": "/* Parameters for the function */"
            },
            {
                "name": "point",
                "type": "myPoint",
                "comment": "/* Point to evaluate */"
            }
        ],
        "precedingComment": "// This struct is used to pass parameters to a function"
    }
}

Check out similar posts by category: C/C++, Clang, Source Introspection