Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Wireshark - Reassembling TCP streams with large XML in payload (multiple TCP packets)

I am trying to write a custom dissector for following purpose:

There are multiple TCP streams in the network, which I want analyse. The main traffic are TCP packets which have plain XML payload. Some of the XMLs are small enough to fit in only one TCP packet. These could be dissected by the TCP/XML dissector of Wirehshark which would work fine.

But I also have XML which are so long that they are stretched over multiple TCP packets. These TCP packets have to be reassembled first to get the whole XML. And that is the main purpose, I want the whole XML as output in a header field.

I have started with the C-String example in the file reassemble.readme of the Wireshark sources. I have a more or less running dissector written. In my test scenarios it worked fine, but if I use captured pcap files wireshark closes without an error message or something. I have no clue why this happens.

Here is the code of the dissector.

#include "config.h"
#include "string.h"
#include <epan/packet.h>
#include <epan/tap.h>
#include <epan/prefs.h>
#include <epan/dissectors/packet-tcp.h>
#include <epan/tvbparse.h>
#include <epan/reassemble.h>
//#include "packet-tcp.h"

#define SOME_PORT 1337
#define debug TRUE

static const char *nicerange;
static int proto_rst_xml = -1;

static int tvb_old_length = 0;
static char *xml_header;
static int old_frame_number = -1;

// Definitions of Header-Fields (hf) for dissection 
static int hf_xml_end = -1;
static int hf_cstring = -1;
static gboolean search_for_xml_header = TRUE;


static int
dissect_rst_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree _U_, void *data _U_)
{
    static char search_end[] = ""; 
    guint offset = 0;
    char search_begin[] = "<?xml";

    gboolean xml_finished = FALSE;
    const guint8 *tvb_string;

    #ifdef debug
    g_print("\n\nFRAME: %i\n", pinfo->fd->num);
    #endif

    char *search_end_ptr;
    tvb_string = tvb_get_string_enc(wmem_packet_scope(), tvb, 0, tvb_reported_length(tvb), ENC_UTF_8);



    if (strstr(tvb_string, search_begin) == NULL)
    {
        #ifdef debug
        g_print("%i no xmlbegin: \n", pinfo->fd->num);
        #endif
        tvb_old_length = 0;
        return tvb_captured_length(tvb);
    }

    #ifdef debug
    g_print("%i is xml: \n", pinfo->fd->num);
    g_print("search for xml header %i \n",search_for_xml_header);
    #endif

    if (search_for_xml_header==TRUE && old_frame_number != pinfo->fd->num){

        search_for_xml_header = FALSE;
        char tvb_string_copy[20000];
        memset(tvb_string_copy,0,strlen(tvb_string_copy));
        strcpy(tvb_string_copy,tvb_string);

        memset(search_end,0,strlen(search_end));
        strcpy(search_end,"</");
        char delimiter[] = "<>";

        // get first token
        search_end_ptr = strtok(tvb_string_copy, delimiter);
        guint i=0;

        //Opening XML-Tag is in the second token
        while((search_end_ptr != NULL) && (i<1)){    
            g_print("%s \n", search_end_ptr);
            search_end_ptr = strtok(NULL,delimiter);

            if(i == 0){

                strcat(search_end,search_end_ptr);
                strtok(search_end, " ");
                g_print("XML-ENDE-TAG: %s \n", search_end);
            }
            i++;
        }


        g_print("XML-END-TAG: %s \n", search_end);

        #ifdef debug
        g_print("copy of tvb_string: %s \n",tvb_string_copy);
        #endif
    }

    old_frame_number = pinfo->fd->num;


    while (offset < tvb_reported_length(tvb))
        {
            gint available = tvb_reported_length_remaining(tvb, offset);
            gint len = tvb_strnlen(tvb, offset, available);

            tvb_string = tvb_get_string_enc(wmem_packet_scope(), tvb, 0, available, ENC_UTF_8);

            //g_print(tvb_string);
            if (strstr(tvb_string, search_end) != NULL)
            {
                #ifdef debug
                g_print("xml finnsihed tag found %s \n", search_end);
                #endif
                xml_finished = TRUE;
            }


            if(xml_finished == FALSE)
            {
                /* we ran out of data: ask for more */
                pinfo->desegment_offset = offset;
                pinfo->desegment_len = DESEGMENT_ONE_MORE_SEGMENT;

                #ifdef debug
                g_print("Waypoint: xml not finnished\n");
                #endif

                return (offset + available);
            }

            col_set_str(pinfo->cinfo, COL_INFO, "RSTXML String");

            if (tree)
            {
                proto_tree_add_item(tree, hf_cstring, tvb, offset, available, ENC_ASCII | ENC_NA);

            }

            offset += (guint)available;
        }

        /* if we get here, then the end of the tvb coincided with the end of a
        string. Happy days. */
        search_for_xml_header = TRUE;

        return tvb_captured_length(tvb);
}

void proto_register_rstxml(void)
{
    proto_rst_xml = proto_register_protocol(
        "RSTXML", /* name        */
        "RSTXML", /* short_name  */
        "rstxml"  /* filter_name */
    );

    //rstxml_tap = register_tap("rstxml");

    static hf_register_info hf[] = {
        {&hf_cstring,
         {"RSTXML String", "rstxml.string", 
         FT_STRING, BASE_NONE, 
         NULL, 0x0,
         NULL, HFILL}},
        {&hf_xml_end,
         {"RSTXML ", "rstxml.xmlendtag", 
         FT_STRING, BASE_NONE, 
         NULL, 0x0,
         NULL, HFILL}}};

    proto_register_field_array(proto_rst_xml, hf, array_length(hf));
    //   proto_register_subtree_array(ett, array_length(ett));

    module_t *rst_xml_module;

    rst_xml_module = prefs_register_protocol(proto_rst_xml, NULL);
}

void proto_reg_handoff_rstxml(void)
{
    static dissector_handle_t rst_xml_handle;

    rst_xml_handle = create_dissector_handle(dissect_rst_xml, proto_rst_xml);
    dissector_add_uint("tcp.port", SOME_PORT, rst_xml_handle);
    dissector_add_uint_range_with_preference("tcp.port",nicerange,rst_xml_handle);
}

Here are so problems which I want to describe:

Because there is a not ending TCP-stream, the TCP-Reassambly won't work properly ( I think this is because a constant connection between the communication partners). This means if one xml is transmitted there is a little or a longer pause an then another xml is transmitted over the same connection.

I also had the problem that the search string for the search_end (the closing xml-tag) is not properly saved for the next run of the dissector. I tried to make it a global static variable but this will work only if I have only one tcp stream. If there are overlapping streams the search_end string is destroyed by the newest TCP-stream and everything is a big mess. Therefore I have put this string in the "static int dissect_rst_xml" function. But I think this won't do the job, too. Is there a way to hand over such informations for the next run of the dissector? I have no clue how to access the void *data struct of the function, maybe this could be a possibility?

My approach is to examine the TCP packets if there is a <?xml string (strstr function). If this is the case I try to figure out the first opening xml tag with the use of the strtok to split the copy of the tvb string, to get the wright tag. As I can see this mechanism works fine. But I think my main problem is to preserve the gained information for the next run.

I hope someone can help me figure out why Wireshark is crashing, and where is my mistake in thinking.

Thank you and have a great day!

ElrondMcBong

Wireshark - Reassembling TCP streams with large XML in payload (multiple TCP packets)

Hello community,

I am trying to write a custom dissector for following purpose:

There are multiple TCP streams in the network, which I want analyse. The main traffic are TCP packets which have plain XML payload. Some of the XMLs are small enough to fit in only one TCP packet. These could be dissected by the TCP/XML dissector of Wirehshark which would work fine.

But I also have XML which are so long that they are stretched over multiple TCP packets. These TCP packets have to be reassembled first to get the whole XML. And that is the main purpose, I want the whole XML as output in a header field.

I have started with the C-String example in the file reassemble.readme of the Wireshark sources. I have a more or less running dissector written. In my test scenarios it worked fine, but if I use captured pcap files wireshark closes without an error message or something. I have no clue why this happens.

Here is the code of the dissector.

#include "config.h"
#include "string.h"
#include <epan/packet.h>
#include <epan/tap.h>
#include <epan/prefs.h>
#include <epan/dissectors/packet-tcp.h>
#include <epan/tvbparse.h>
#include <epan/reassemble.h>
//#include "packet-tcp.h"

#define SOME_PORT 1337
#define debug TRUE

static const char *nicerange;
static int proto_rst_xml = -1;

static int tvb_old_length = 0;
static char *xml_header;
static int old_frame_number = -1;

// Definitions of Header-Fields (hf) for dissection 
static int hf_xml_end = -1;
static int hf_cstring = -1;
static gboolean search_for_xml_header = TRUE;


static int
dissect_rst_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree _U_, void *data _U_)
{
    static char search_end[] = ""; 
    guint offset = 0;
    char search_begin[] = "<?xml";

    gboolean xml_finished = FALSE;
    const guint8 *tvb_string;

    #ifdef debug
    g_print("\n\nFRAME: %i\n", pinfo->fd->num);
    #endif

    char *search_end_ptr;
    tvb_string = tvb_get_string_enc(wmem_packet_scope(), tvb, 0, tvb_reported_length(tvb), ENC_UTF_8);



    if (strstr(tvb_string, search_begin) == NULL)
    {
        #ifdef debug
        g_print("%i no xmlbegin: \n", pinfo->fd->num);
        #endif
        tvb_old_length = 0;
        return tvb_captured_length(tvb);
    }

    #ifdef debug
    g_print("%i is xml: \n", pinfo->fd->num);
    g_print("search for xml header %i \n",search_for_xml_header);
    #endif

    if (search_for_xml_header==TRUE && old_frame_number != pinfo->fd->num){

        search_for_xml_header = FALSE;
        char tvb_string_copy[20000];
        memset(tvb_string_copy,0,strlen(tvb_string_copy));
        strcpy(tvb_string_copy,tvb_string);

        memset(search_end,0,strlen(search_end));
        strcpy(search_end,"</");
        char delimiter[] = "<>";

        // get first token
        search_end_ptr = strtok(tvb_string_copy, delimiter);
        guint i=0;

        //Opening XML-Tag is in the second token
        while((search_end_ptr != NULL) && (i<1)){    
            g_print("%s \n", search_end_ptr);
            search_end_ptr = strtok(NULL,delimiter);

            if(i == 0){

                strcat(search_end,search_end_ptr);
                strtok(search_end, " ");
                g_print("XML-ENDE-TAG: %s \n", search_end);
            }
            i++;
        }


        g_print("XML-END-TAG: %s \n", search_end);

        #ifdef debug
        g_print("copy of tvb_string: %s \n",tvb_string_copy);
        #endif
    }

    old_frame_number = pinfo->fd->num;


    while (offset < tvb_reported_length(tvb))
        {
            gint available = tvb_reported_length_remaining(tvb, offset);
            gint len = tvb_strnlen(tvb, offset, available);

            tvb_string = tvb_get_string_enc(wmem_packet_scope(), tvb, 0, available, ENC_UTF_8);

            //g_print(tvb_string);
            if (strstr(tvb_string, search_end) != NULL)
            {
                #ifdef debug
                g_print("xml finnsihed tag found %s \n", search_end);
                #endif
                xml_finished = TRUE;
            }


            if(xml_finished == FALSE)
            {
                /* we ran out of data: ask for more */
                pinfo->desegment_offset = offset;
                pinfo->desegment_len = DESEGMENT_ONE_MORE_SEGMENT;

                #ifdef debug
                g_print("Waypoint: xml not finnished\n");
                #endif

                return (offset + available);
            }

            col_set_str(pinfo->cinfo, COL_INFO, "RSTXML String");

            if (tree)
            {
                proto_tree_add_item(tree, hf_cstring, tvb, offset, available, ENC_ASCII | ENC_NA);

            }

            offset += (guint)available;
        }

        /* if we get here, then the end of the tvb coincided with the end of a
        string. Happy days. */
        search_for_xml_header = TRUE;

        return tvb_captured_length(tvb);
}

void proto_register_rstxml(void)
{
    proto_rst_xml = proto_register_protocol(
        "RSTXML", /* name        */
        "RSTXML", /* short_name  */
        "rstxml"  /* filter_name */
    );

    //rstxml_tap = register_tap("rstxml");

    static hf_register_info hf[] = {
        {&hf_cstring,
         {"RSTXML String", "rstxml.string", 
         FT_STRING, BASE_NONE, 
         NULL, 0x0,
         NULL, HFILL}},
        {&hf_xml_end,
         {"RSTXML ", "rstxml.xmlendtag", 
         FT_STRING, BASE_NONE, 
         NULL, 0x0,
         NULL, HFILL}}};

    proto_register_field_array(proto_rst_xml, hf, array_length(hf));
    //   proto_register_subtree_array(ett, array_length(ett));

    module_t *rst_xml_module;

    rst_xml_module = prefs_register_protocol(proto_rst_xml, NULL);
}

void proto_reg_handoff_rstxml(void)
{
    static dissector_handle_t rst_xml_handle;

    rst_xml_handle = create_dissector_handle(dissect_rst_xml, proto_rst_xml);
    dissector_add_uint("tcp.port", SOME_PORT, rst_xml_handle);
    dissector_add_uint_range_with_preference("tcp.port",nicerange,rst_xml_handle);
}

Here are so problems which I want to describe:

Because there is a not ending TCP-stream, the TCP-Reassambly won't work properly ( I think this is because a constant connection between the communication partners). This means if one xml is transmitted there is a little or a longer pause an then another xml is transmitted over the same connection.

I also had the problem that the search string for the search_end (the closing xml-tag) is not properly saved for the next run of the dissector. I tried to make it a global static variable but this will work only if I have only one tcp stream. If there are overlapping streams the search_end string is destroyed by the newest TCP-stream and everything is a big mess. Therefore I have put this string in the "static int dissect_rst_xml" function. But I think this won't do the job, too. Is there a way to hand over such informations for the next run of the dissector? I have no clue how to access the void *data struct of the function, maybe this could be a possibility?

My approach is to examine the TCP packets if there is a <?xml string (strstr function). If this is the case I try to figure out the first opening xml tag with the use of the strtok to split the copy of the tvb string, to get the wright tag. As I can see this mechanism works fine. But I think my main problem is to preserve the gained information for the next run.

I hope someone can help me figure out why Wireshark is crashing, and where is my mistake in thinking.

Thank you and have a great day!

ElrondMcBong