dmp文件读取(二)- Thread


| 阅读 |,阅读约 5 分钟
| 复制链接:

Overview

dmp文件解析

dmp文件读取(二)

前面的章节一直提到,dmp中有很多中不同的stream,分别代表不同维度的信息,比如:操作系统信息、线程信息、异常信息、模块信息等。

breakpad中的流的表示

枚举类 MDStreamType 表示所有的流,源码位置:src/google_breakpad/common/minidump_format.h

 1typedef enum {
 2  MD_UNUSED_STREAM               =  0,
 3  MD_RESERVED_STREAM_0           =  1,
 4  MD_RESERVED_STREAM_1           =  2,
 5  MD_THREAD_LIST_STREAM          =  3,  /* MDRawThreadList */
 6  MD_MODULE_LIST_STREAM          =  4,  /* MDRawModuleList */
 7  MD_MEMORY_LIST_STREAM          =  5,  /* MDRawMemoryList */
 8  MD_EXCEPTION_STREAM            =  6,  /* MDRawExceptionStream */
 9  MD_SYSTEM_INFO_STREAM          =  7,  /* MDRawSystemInfo */
10  MD_THREAD_EX_LIST_STREAM       =  8,
11  MD_MEMORY_64_LIST_STREAM       =  9,
12  MD_COMMENT_STREAM_A            = 10,
13  MD_COMMENT_STREAM_W            = 11,
14  MD_HANDLE_DATA_STREAM          = 12,
15  MD_FUNCTION_TABLE_STREAM       = 13,
16  MD_UNLOADED_MODULE_LIST_STREAM = 14,
17  MD_MISC_INFO_STREAM            = 15,  /* MDRawMiscInfo */
18  MD_MEMORY_INFO_LIST_STREAM     = 16,  /* MDRawMemoryInfoList */
19  MD_THREAD_INFO_LIST_STREAM     = 17,
20  MD_HANDLE_OPERATION_LIST_STREAM = 18,
21  MD_TOKEN_STREAM                = 19,
22  MD_JAVASCRIPT_DATA_STREAM      = 20,
23  MD_SYSTEM_MEMORY_INFO_STREAM   = 21,
24  MD_PROCESS_VM_COUNTERS_STREAM  = 22,
25  MD_LAST_RESERVED_STREAM        = 0x0000ffff,
26
27  /* Breakpad extension types.  0x4767 = "Gg" */
28  MD_BREAKPAD_INFO_STREAM        = 0x47670001,  /* MDRawBreakpadInfo  */
29  MD_ASSERTION_INFO_STREAM       = 0x47670002,  /* MDRawAssertionInfo */
30  /* These are additional minidump stream values which are specific to
31   * the linux breakpad implementation. */
32  MD_LINUX_CPU_INFO              = 0x47670003,  /* /proc/cpuinfo      */
33  MD_LINUX_PROC_STATUS           = 0x47670004,  /* /proc/$x/status    */
34  MD_LINUX_LSB_RELEASE           = 0x47670005,  /* /etc/lsb-release   */
35  MD_LINUX_CMD_LINE              = 0x47670006,  /* /proc/$x/cmdline   */
36  MD_LINUX_ENVIRON               = 0x47670007,  /* /proc/$x/environ   */
37  MD_LINUX_AUXV                  = 0x47670008,  /* /proc/$x/auxv      */
38  MD_LINUX_MAPS                  = 0x47670009,  /* /proc/$x/maps      */
39  MD_LINUX_DSO_DEBUG             = 0x4767000A   /* MDRawDebug{32,64}  */
40} MDStreamType;  /* MINIDUMP_STREAM_TYPE */
41

breakpad中流的读取

每种stream都有一个特定的类来表示,他们都有一个公共的基类MinidumpStream,这是一个虚基类,有一个虚方法Read,用来约定每种流的读取方法,源码位置:src/google_breakpad/processor/minidump.h

MinidumpStream

 1class MinidumpStream : public MinidumpObject {
 2 public:
 3  virtual ~MinidumpStream() {}
 4
 5 protected:
 6  explicit MinidumpStream(Minidump* minidump);
 7
 8 private:
 9  // Populate (and validate) the MinidumpStream.  minidump_ is expected
10  // to be positioned at the beginning of the stream, so that the next
11  // read from the minidump will be at the beginning of the stream.
12  // expected_size should be set to the stream's length as contained in
13  // the MDRawDirectory record or other identifying record.  A class
14  // that implements MinidumpStream can compare expected_size to a
15  // known size as an integrity check.
16  virtual bool Read(uint32_t expected_size) = 0;
17
18  DISALLOW_COPY_AND_ASSIGN(MinidumpStream);
19};

Stream的读取实现

每种stream有一个实现类,他们都有一些共性

  • 实现了基类 MinidumpStream
  • 重新基类 Read 方法
  • 成员变量 kStreamType 指定了该stream对应的枚举值

常用的几个stream实现类如下,源码位置:src/google_breakpad/processor/minidump.h,前一篇文章的xmind图也有stream和枚举值的对应关系

  • MinidumpThreadList
  • MinidumpModuleList
  • MinidumpMemoryList
  • MinidumpException
  • MinidumpAssertion
  • MinidumpSystemInfo
  • MinidumpUnloadedModule
  • MinidumpUnloadedModuleList
  • MinidumpMiscInfo
  • MinidumpBreakpadInfo
  • MinidumpMemoryInfoList

线程stream读取调用的入口

上一篇文章我们提到过

1ProcessResult MinidumpProcessor::Process(
2    Minidump *dump, ProcessState *process_state) {
3    ...
4    // 线程stream读取的入口,读取的线程列表信息保存在MinidumpThreadList中
5    MinidumpThreadList *threads = dump->GetThreadList();
6    ...
7}

实现类 MinidumpThreadList,源码位置:src/google_breakpad/processor/minidump.h

线程stream的实现

 1/* 读取线程stream信息,保存到该对象中
 2*   1. 线程信息是一个列表,包含该进程运行的所有线程信息
 3*   2. 有两个成员变量用来保存线程,vector数组类型和map类型(以线程id为key)
 4*   3. 重新基类的Read方法,实现自己流的读取业务逻辑
 5*   4. kStreamType 指定了该stream对应的枚举值
 6*   5. 每个线程的信息保存在 MinidumpThread 对象中
 7*
 8*/
 9class MinidumpThreadList : public MinidumpStream {
10 public:
11  virtual ~MinidumpThreadList();
12
13  // 
14  static void set_max_threads(uint32_t max_threads) {
15    max_threads_ = max_threads;
16  }
17  static uint32_t max_threads() { return max_threads_; }
18
19  virtual unsigned int thread_count() const {
20    return valid_ ? thread_count_ : 0;
21  }
22
23  // Sequential access to threads.
24  virtual MinidumpThread* GetThreadAtIndex(unsigned int index) const;
25
26  // Random access to threads.
27  MinidumpThread* GetThreadByID(uint32_t thread_id);
28
29  // Print a human-readable representation of the object to stdout.
30  void Print();
31
32 protected:
33  explicit MinidumpThreadList(Minidump* aMinidump);
34
35 private:
36  friend class Minidump;
37
38  typedef map<uint32_t, MinidumpThread*> IDToThreadMap;
39  typedef vector<MinidumpThread> MinidumpThreads;
40
41  static const uint32_t kStreamType = MD_THREAD_LIST_STREAM;
42
43  bool Read(uint32_t aExpectedSize) override;
44
45  // The largest number of threads that will be read from a minidump.  The
46  // default is 256.
47  static uint32_t max_threads_;
48
49  // Access to threads using the thread ID as the key.
50  IDToThreadMap    id_to_thread_map_;
51
52  // The list of threads.
53  MinidumpThreads* threads_;
54  uint32_t        thread_count_;
55
56  DISALLOW_COPY_AND_ASSIGN(MinidumpThreadList);
57};

线程详情

每个线程的信息保存在 MinidumpThread 对象中,线程的数据结构信息保存在 MDRawThread 对象中,从dmp文件读取的内容,也是保存在MDRawThread。 源码位置:src/google_breakpad/processor/minidump.h,

线程核心数据结构如下,读取dmp时,就是找到stream流在文件中的偏移位置,并按sizeof(MDRawThread)的大小,将字节一个个读取到结构体中(反序列化)

 1typedef struct {
 2  // 线程id
 3  uint32_t             thread_id;
 4  // 线程挂起次数
 5  uint32_t             suspend_count;
 6  // 线程优先级级别
 7  uint32_t             priority_class;
 8  // 线程优先级
 9  uint32_t             priority;
10  uint64_t             teb;             /* Thread environment block */
11  // 线程使用的栈在dmp文件中的偏移位置和大小
12  MDMemoryDescriptor   stack;
13  // 线程上下文信息在dmp文件中的偏移位置和大小
14  MDLocationDescriptor thread_context;  /* MDRawContext[CPU] */
15} MDRawThread;  /* MINIDUMP_THREAD */

线程类

 1/**
 2* 保存线程信息的核心对象,核心的数据、从dmp文件中读取的,主要保存为MDRawThread对象
 3*  1. Read方法是核心,后面专门介绍
 4*/
 5class MinidumpThread : public MinidumpObject {
 6 public:
 7  virtual ~MinidumpThread();
 8
 9  const MDRawThread* thread() const { return valid_ ? &thread_ : NULL; }
10  // GetMemory may return NULL even if the MinidumpThread is valid,
11  // if the thread memory cannot be read.
12  virtual MinidumpMemoryRegion* GetMemory();
13  // GetContext may return NULL even if the MinidumpThread is valid.
14  virtual MinidumpContext* GetContext();
15
16  // The thread ID is used to determine if a thread is the exception thread,
17  // so a special getter is provided to retrieve this data from the
18  // MDRawThread structure.  Returns false if the thread ID cannot be
19  // determined.
20  virtual bool GetThreadID(uint32_t *thread_id) const;
21
22  // Print a human-readable representation of the object to stdout.
23  void Print();
24
25  // Returns the start address of the thread stack memory region.  Returns 0 if
26  // MinidumpThread is invalid.  Note that this method can be called even when
27  // the thread memory cannot be read and GetMemory returns NULL.
28  virtual uint64_t GetStartOfStackMemoryRange() const;
29
30 protected:
31  explicit MinidumpThread(Minidump* minidump);
32
33 private:
34  // These objects are managed by MinidumpThreadList.
35  friend class MinidumpThreadList;
36
37  // This works like MinidumpStream::Read, but is driven by
38  // MinidumpThreadList.  No size checking is done, because
39  // MinidumpThreadList handles that directly.
40  bool Read();
41
42  MDRawThread           thread_;
43  MinidumpMemoryRegion* memory_;
44  MinidumpContext*      context_;
45};

线程读取方法

 1bool MinidumpThread::Read() {
 2  // Invalidate cached data.
 3  delete memory_;
 4  memory_ = NULL;
 5  delete context_;
 6  context_ = NULL;
 7
 8  valid_ = false;
 9
10  //  thread_是成员变量,类型为MDRawThread
11  // 读取sizeof(MDRawThread)大小的数据,前面专门介绍过,保存了线程id等重要信息
12  if (!minidump_->ReadBytes(&thread_, sizeof(thread_))) {
13    BPLOG(ERROR) << "MinidumpThread cannot read thread";
14    return false;
15  }
16
17  // 上一篇文章也介绍过,根据dmp文件的标志判断cpu是大端还是小端,决定高8位字节和低8位字节是否需要做交换
18  if (minidump_->swap()) {
19    Swap(&thread_.thread_id);
20    Swap(&thread_.suspend_count);
21    Swap(&thread_.priority_class);
22    Swap(&thread_.priority);
23    Swap(&thread_.teb);
24    Swap(&thread_.stack);
25    Swap(&thread_.thread_context);
26  }
27
28  // 将线程调用栈在dmp中的偏移位置和大小保存在成员变量中
29  // Check for base + size overflow or undersize.
30  if (thread_.stack.memory.rva == 0 ||
31      thread_.stack.memory.data_size == 0 ||
32      thread_.stack.memory.data_size > numeric_limits<uint64_t>::max() -
33                                       thread_.stack.start_of_memory_range) {
34    // This is ok, but log an error anyway.
35    BPLOG(ERROR) << "MinidumpThread has a memory region problem, " <<
36                    HexString(thread_.stack.start_of_memory_range) << "+" <<
37                    HexString(thread_.stack.memory.data_size) <<
38                    ", RVA 0x" << HexString(thread_.stack.memory.rva);
39  } else {
40    memory_ = new MinidumpMemoryRegion(minidump_);
41    memory_->SetDescriptor(&thread_.stack);
42  }
43
44  valid_ = true;
45  return true;
46}