博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
20150501 调试分析之 修改内核来定位系统僵死问题
阅读量:4494 次
发布时间:2019-06-08

本文共 13169 字,大约阅读时间需要 43 分钟。

20150501 调试分析之 修改内核来定位系统僵死问题

2015-05-01 Lover雪儿

今天还是研究内核调试,

死机,这个词语,大家应该不陌生.

当我们写程序,如果加入到内核中的程序中有出现死循环的话,启动内核运行程序会直接进入相对死机状态.

那么怎么可以解决这个问题呢?

我们都知道,我们人的心脏是一直跳动的,而恰恰如此,内核也有它的跳动,那就是tick中断,

所以我们可以从tick中断入手,解决上面的死机问题.

 

在开发板上运行cat /proc/interrupts 可以查看系统当前的各种中断号,

可以看到一个中断名为i.MX Timer Tick 的中断,那么它就是我们今天的主角.

1 root@EasyARM-iMX257 /mnt/nfs/module# cat /proc/interrupts  2          CPU0 3   9:          0           -  mxsdhci 4  14:          0           -  CSPI_IRQ 5  25:          2           -  imxdi     -  mxcsdma 6  35:          0           -  ehci_hcd:usb1 7  37           2453     -  mxcintuart 8  46:          3           -  m -  i.MX Timer Tick   9  57:          0           -  mxsdhci10 Err:           0

 

在内核中查找 Timer Tick的源代码,如下所示:

1 /* linux-2.6.31/arch/arm/plat-mxc/time.c 2  * IRQ handler for the timer 3  */ 4 static irqreturn_t mxc_timer_interrupt(int irq, void *dev_id) 5 { 6     struct clock_event_device *evt = &clockevent_mxc; 7     uint32_t tstat; 8  9     if (timer_is_v2())10         tstat = __raw_readl(timer_base + MX3_TSTAT);11     else12         tstat = __raw_readl(timer_base + MX1_2_TSTAT);13 14     gpt_irq_acknowledge();15 16     evt->event_handler(evt);17 18     return IRQ_HANDLED;19 }20 21 static struct irqaction mxc_timer_irq = {22     .name        = "i.MX Timer Tick",23     .flags        = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,24     .handler    = mxc_timer_interrupt,25 };

 

在这个函数中,我们可以增加一些代码:有点类似看门狗

.mxc_timer_interrupt中增加打印语句

mxc_timer_interrupt 中断函数中检测系统当前正在运行的中断,如果10S之内都是同一个进程正在运行的话,那就我们就把这个进程打印出来(先从简单入手,此处先不做太多的复杂事情)

步骤:

首先备份 linux-2.6.31/arch/arm/plat-mxc/time.c,

接着修改time.c的内容,

最后编译内核,重新给板子启动新内核

 

root@Lover:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# cp time.c time.c.bak

修改time.c,再中断函数中加入打印语句

root@Lover:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# vi time.c

************************************************************************************************

root@Lover:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# cd ../../..

编译内核

root@Lover:/home/study/nfs_home/system/linux-2.6.31# make uImage

CHK include/linux/version.h

make[1]: 'include/asm-arm/mach-types.h' is up to date.

CHK include/linux/utsrelease.h

SYMLINK include/asm -> include/asm-arm

************************************************************************************************

Data Size: 2180620 Bytes = 2129.51 kB = 2.08 MB

Load Address: 80008000

Entry Point: 80008000

Image arch/arm/boot/uImage is ready

root@Lover:/home/study/nfs_home/system/linux-2.6.31# cp arch/arm/boot/uImage /tftpboot/uImage

root@Lover:/home/study/nfs_home/system/linux-2.6.31#

************************************************************************************************

在开发板上重新烧写内核

MX25 U-Boot > run upsystem

FEC: enable RMII gasket

ver 192.168.31.179; our IP address is 192.168.31.180

Filename '00

Loading: #################################################################

#################################################################

###################

done

************************************************************************************************

加载完毕后,如果不动开发板,会发现,每隔10s,就会有进程pid=0,名字name=swapper的打印消息.

root@EasyARM-iMX257 ~# mxc_timer_interrupt: pid = 0, name = swapper

root@EasyARM-iMX257 ~# mxc_timer_interrupt: pid = 0, name = swapper

root@EasyARM-iMX257 ~#

修改time.c如下所示:

1 /*   linux-2.6.31/arch/arm/plat-mxc/time.c 2  * IRQ handler for the timer 3  */ 4 static irqreturn_t mxc_timer_interrupt(int irq, void *dev_id) 5 { 6     struct clock_event_device *evt = &clockevent_mxc; 7     uint32_t tstat; 8 //// 9     static pid_t pre_pid;10     static int  cnt = 0;11     if(pre_pid == current->pid){12         cnt++;13     }else{14         cnt = 0;15         pre_pid = current->pid;16     }17     if(cnt == 10*HZ){18         cnt = 0;19         printk("mxc_timer_interrupt: pid = %d, name = %s\n",current->pid, current->comm);20     }21 //22     if (timer_is_v2())23         tstat = __raw_readl(timer_base + MX3_TSTAT);24     else25         tstat = __raw_readl(timer_base + MX1_2_TSTAT);26 27     gpt_irq_acknowledge();28 29     evt->event_handler(evt);30 31     return IRQ_HANDLED;32 }

 

.修改错误代码,在代码中增加死循环

还是沿用我们前面的err_led.c的驱动程序.

参考博客地址:

open函数中,我们故意加入一个死循环.

/* err_led.c */ 44 static int key_open(struct inode *inode, struct file *file) 45 { 46     printk("<0>function open!\n\n"); 47     //在此加入一个死循环 48     while(1); 49     return 0; 50 }

 

编译接着在开发板中加载错误驱动程序,使用cat 命令打开设备.

root@EasyARM-iMX257 ~# ifconfig eth0 192.168.31.181;mount -t nfs 192.168.31.179:

/home/study/nfs_home /mnt/nfs -o nolock;cd /mnt/nfs/module/

root@EasyARM-iMX257 /mnt/nfs/module#

root@EasyARM-iMX257 /mnt/nfs/module# cd 39_debug_with_timer/

root@EasyARM-iMX257 /mnt/nfs/module/39_debug_with_timer# insmod err_led.ko

Hello,this is err_led_dev module!

addr base_iomux : c4a26000

addr base_gpio3 : c4a2a000

addCTL : c4a26270

addr GDIR_GPIO3a2a000

root@EasyARM-iMX257 /mnt/nfs/module/39_debug_with_timer# cat /dev/err_led_dev

function open!

#################################################################

可以发现,打开设备后,进入open函数,系统直接进入死机状态,每格10s中便会打印出我们的进程号pid=1805

mxc_timer_interrupt: pid = 1805, name = cat

mxc_timer_interrupt: pid = 1805, name = cat

mxc_timer_interrupt: pid = 1805, name = cat

.修改错误代码,在代码中增加死循环

接着恢复上面的time.c的代码,我们找到linux-2.6.31/arch/arm/kernel/irq.c文件中找打系统中断总调用者asm_do_IRQ,

我们在asm_do_IRQ函数里加入前面time.c中的打印代码.

root@Lover:/home/study/nfs_home/system/linux-2.6.31# cd arch/arm/plat-mxc/

root@Lover:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# mv time.c.bak time.c

root@Lover:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# cd ..

root@Lover:/home/study/nfs_home/system/linux-2.6.31/arch/arm# cd kernel/

root@Lover:/home/study/nfs_home/system/linux-2.6.31/arch/arm/kernel# vi irq.c

root@Lover:/home/study/nfs_home/system/linux-2.6.31/arch/arm/kernel# cd ../../../

root@Lover:/home/study/nfs_home/system/linux-2.6.31# make uImage

########################################################

Load Address: 80008000

Entry Point: 80008000

Image arch/arm/boot/uImage is ready

root@Lover:/home/study/nfs_home/system/linux-2.6.31# cp arch/arm/boot/uImage /tftpboot/uImage

root@Lover:/home/study/nfs_home/system/linux-2.6.31#

########################################################

从开发板重新烧写新内核

启动开发板

Irq.c修改内容如下:

1 /* linux-2.6.31/arch/arm/kernel/irq.c 2  * do_IRQ handles all hardware IRQ's.  Decoded IRQs should not 3  * come via this function.  Instead, they should provide their 4  * own 'handler' 5  */ 6 asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) 7 { 8     struct pt_regs *old_regs = set_irq_regs(regs); 9 ////10 //从  cat /proc/interrupts  中得到我们的tick中断为4611    if(irq == 46)12    {13     ////14         static pid_t pre_pid;15         static int  cnt = 0;16         if(pre_pid == current->pid){17             cnt++;18         }else{19             cnt = 0;20             pre_pid = current->pid;21         }22         if(cnt == 10*HZ){23             cnt = 0;24             printk("asm_do_IRQ => mxc_timer_interrupt: pid = %d, name = %s\n",current->pid, current->comm);25             printk("pc = %08x\n",regs->ARM_pc);//ptract.h26         }27     /////28     }29 ////30 31     irq_enter();32 33     /*34      * Some hardware gives randomly wrong interrupts.  Rather35      * than crashing, do something sensible.36      */37     if (unlikely(irq >= NR_IRQS)) {38         if (printk_ratelimit())39             printk(KERN_WARNING "Bad IRQ%u\n", irq);40         ack_bad_irq(irq);41     } else {42         generic_handle_irq(irq);43     }44 45     /* AT91 specific workaround */46     irq_finish(irq);47 48     irq_exit();49     set_irq_regs(old_regs);50 }

 

启动开发板,加载错误的驱动程序,根据打印出来的PC值来反推错误地址:

root@EasyARM-iMX257 /mnt/nfs/module/39_debug_with_timer# insmod err_led.ko

root@EasyARM-iMX257 /mnt/nfs/module/39_debug_with_timer# cat /dev/err_led_dev

function open!

根据打印出来的pc,cat /proc/kallmps,找到错误的驱动err_led.ko,对其进行反汇编,然后找到错误的函数,进而反推出c语言代码出错位置.

92 00000130 <key_open>:

93 130: e52de004 str lr, [sp, #-4]!

94 134: e59f0008 ldr r0, [pc, #8] ; 144 <.text+0x144>

95 138: e24dd004 sub sp, sp, #4 ; 0x4

96 13c: ebfffffe bl 0 <printk>

97 140: eafffffe b 140 <key_open+0x10> //很容易就找到了错误地址,此处一直b 140就为死循环

98 144: 000000cc andeq r0, r0, ip, asr #1

99

步骤和前面的博客文章

<>一样了,

此处不再赘述,博客地址:

如果要调试应用程序,可以使用strace,具体的用法,百度上有很详细的解释

 

附上驱动程序err_led.c

1 #include
2 #include
3 #include
4 #include
5 #include
6 #include
7 #include
8 #include
9 #include
10 #include
11 #include
12 #include
13 #include
14 15 #define Driver_NAME "err_led_dev" 16 #define DEVICE_NAME "err_led_dev" 17 18 static int major = 0; 19 20 #define LED_ON 0 21 #define LED_OFF 1 22 23 24 //auto to create device node 25 static struct class *drv_class = NULL; 26 static struct class_device *drv_class_dev = NULL; 27 28 //寄存器基址; 29 static unsigned long mem_iomux; 30 static unsigned long mem_gpio3; 31 static unsigned long base_iomux; //iomux基址 0X 43FA C000 - 0X 43FA FFFF 32 static unsigned long base_gpio3; //gpio3 0X 53FA 4000 - 0X 53FA 7FFF 33 // MUX_CTL模式选择 配置寄存器 34 #define MUX_CTL (*(volatile unsigned long *)(base_iomux + 0x0060)) 35 // PAD_CTL GPIO常用功能设置 36 #define PAD_CTL (*(volatile unsigned long *)(base_iomux + 0x0270)) 37 // GPIO DR 数据寄存器 DR 38 #define DR_GPIO3 (*(volatile unsigned long *)(base_gpio3 + 0x0000)) 39 // GPIO GDIR 方向控制寄存器 GDIR 40 #define GDIR_GPIO3 (*(volatile unsigned long *)(base_gpio3 + 0x0004)) 41 42 43 static int key_open(struct inode *inode, struct file *file) 44 { 45 printk("<0>function open!\n\n"); 46 //// 47 //在此加入一个死循环 48 while(1); 49 //// 50 return 0; 51 } 52 53 static int key_read(struct file *filp, char __user *buff, size_t count, loff_t *offp) 54 { 55 return 0; 56 } 57 58 static ssize_t key_write(struct file *file, const char __user *buf, size_t count, loff_t * ppos) 59 { 60 printk("<0>function write!\n\n"); 61 return 1; 62 } 63 64 static int key_release(struct inode *inode, struct file *filp) 65 { 66 printk("<0>function write!\n\n"); 67 return 0; 68 } 69 70 static int key_ioctl(struct inode *inode,struct file *flip,unsigned int command,unsigned long arg) 71 { 72 printk("<0>function ioctl!\n\n"); 73 74 switch(command) 75 { 76 case LED_ON: 77 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零 亮 78 break; 79 case LED_OFF: 80 81 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1 灭 82 break; 83 default: 84 break; 85 } 86 87 return 0; 88 } 89 static struct file_operations key_fops = { 90 .owner = THIS_MODULE, /* 这是一个宏,推向编译模块时自动创建的__this_module变量 */ 91 .open = key_open, 92 .read = key_read, 93 .write = key_write, 94 .release= key_release, 95 .ioctl = key_ioctl, 96 }; 97 98 void gpio_addr(void){ 99 printk("<0>addr base_iomux : %x \n",base_iomux);100 printk("<0>addr base_gpio3 : %x \n",base_gpio3);101 printk("<0>addr MUX_CTL : %x \n",&MUX_CTL);102 printk("<0>addr PAD_CTL : %x \n",&PAD_CTL);103 printk("<0>addr GDIR_GPIO3 : %x \n",&GDIR_GPIO3);104 printk("<0>addr DR_GPIO3 : %x \n",&DR_GPIO3);105 }106 107 108 109 void led_on_off(void){110 ssleep(1);111 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1112 ssleep(1);113 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零114 ssleep(1);115 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1116 ssleep(1);117 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零118 ssleep(1);119 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1120 ssleep(1);121 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零122 ssleep(1);123 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1124 ssleep(1);125 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零126 ssleep(1);127 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1128 }129 130 static int __init key_irq_init(void)131 {132 printk("<0>\nHello,this is %s module!\n\n",Driver_NAME);133 //register and mknod134 major = register_chrdev(0,Driver_NAME,&key_fops);135 drv_class = class_create(THIS_MODULE,Driver_NAME);136 drv_class_dev = device_create(drv_class,NULL,MKDEV(major,0),NULL,DEVICE_NAME); /*/dev/key_query*/137 138 //IO端口申请 ioremap 可以直接通过指针来访问这些地址139 base_iomux = ioremap(0x43FAC000,0xFFF);140 base_gpio3 = ioremap(0x53FA4000,0xFFF);141 142 //MUX_CTL143 MUX_CTL &= ~(0x07 << 0); 144 MUX_CTL |= (0X05 << 0); //设置为ALT5 GPIO3_23 ERR_LED145 //PAD_CTL146 PAD_CTL &= ~(0x01<<13 | 0x01<<3 | 0x03<<1 | 0x01<<0); //1.8v 不需要上拉下拉 CMOS输出 slew rate147 //GDIR_GPIO3 配置为输出模式148 GDIR_GPIO3 &= ~(0x01 << 23); 149 GDIR_GPIO3 |= (0x01 << 23); //配置为输出模式 150 151 //DR_GPIO3 配置为输出0 点亮ERR_LED152 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零153 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零154 gpio_addr();155 led_on_off();156 return 0; 157 }158 159 static void __exit key_irq_exit(void)160 {161 gpio_addr();162 printk("<0>\nGoodbye,%s!\n\n",Driver_NAME);163 led_on_off();164 165 unregister_chrdev(major,Driver_NAME);166 device_unregister(drv_class_dev);167 class_destroy(drv_class);168 169 //释放IO端口170 iounmap(base_iomux);171 iounmap(base_gpio3);172 }173 174 175 /* 这两行指定驱动程序的初始化函数和卸载函数 */176 module_init(key_irq_init);177 module_exit(key_irq_exit);178 179 /* 描述驱动程序的一些信息,不是必须的 */180 MODULE_AUTHOR("Lover雪儿");181 MODULE_VERSION("0.1.0");182 MODULE_DESCRIPTION("IMX257 key Driver");183 MODULE_LICENSE("GPL");
err_led.ko

 

转载于:https://www.cnblogs.com/lihaiyan/p/4470583.html

你可能感兴趣的文章
SVN里的一些细小概念
查看>>
iOS9 HTTP请求失败
查看>>
一个开发环境遇到的问题
查看>>
Meet in the middle学习笔记
查看>>
autocad.net 利用linq获取矩形框内的块参照
查看>>
过滤动态块
查看>>
FastJSON学习
查看>>
【JavaWeb】DbUtils入门之QueryRunner
查看>>
dblink的使用
查看>>
实验报告
查看>>
linux后台运行
查看>>
(转)浅谈分布式
查看>>
Chrome扩展移植到Edge浏览器教程
查看>>
mysql分表的3种方法(转)
查看>>
eclipse格式化代码样式
查看>>
asp uploadify示例下载
查看>>
1/7 第一篇 变量的内存实质
查看>>
jQuery遮罩插件jQuery.blockUI.js简介
查看>>
MaskedTextBox控件实现输入验证
查看>>
设计模式-行为型模式-中介者模式
查看>>