Draft for the final version

2021-08-15 20:34:43 +02:00 · 2021-08-15 20:34:43 +02:00 · 5fc9fc40c1
parent f5877138e2
commit 5fc9fc40c1
6 changed files with 1585 additions and 124 deletions
--- a/bibliography.bib
+++ b/bibliography.bib
@ -61,6 +61,14 @@ series = {ASE 2016}
  urldate = {2021-05-20}
 }
@online{miraisec,
  author = {Eduard Kovacs},
  title = {Over 500,000 IoT Devices Vulnerable to Mirai Botnet},
  year = 2016,
  url = {https://web.archive.org/web/20210507170030/https://www.securityweek.com/over-500000-iot-devices-vulnerable-mirai-botnet},
  urldate = {2021-05-07}
 }
@online{xss,
  author = {The OWASP Foundation},
  title = {Cross Site Scripting (XSS) Software Attack | OWASP},
@ -542,3 +550,71 @@ howpublished = {\url{https://github.com/jtpereyda/boofuzz}},
  year={2012},
  publisher={Citeseer}
 }
@inproceedings{mirai,
 author = {Manos Antonakakis and Tim April and Michael Bailey and Matt Bernhard and Elie Bursztein and Jaime Cochran and Zakir Durumeric and J. Alex Halderman and Luca Invernizzi and Michalis Kallitsis and Deepak Kumar and Chaz Lever and Zane Ma and Joshua Mason and Damian Menscher and Chad Seaman and Nick Sullivan and Kurt Thomas and Yi Zhou},
 title = {Understanding the Mirai Botnet},
 booktitle = {26th {USENIX} Security Symposium ({USENIX} Security 17)},
 year = 2017,
 isbn = {978-1-931971-40-9},
 address = {Vancouver, BC},
 pages = {1093--1110},
 url = {https://www.usenix.org/conference/usenixsecurity17/technical-sessions/presentation/antonakakis},
 publisher = {{USENIX} Association},
 month = aug,
 }
@article{crystal,
  author    = {Nan Zhang and
               Soteris Demetriou and
               Xianghang Mi and
               Wenrui Diao and
               Kan Yuan and
               Peiyuan Zong and
               Feng Qian and
               XiaoFeng Wang and
               Kai Chen and
               Yuan Tian and
               Carl A. Gunter and
               Kehuan Zhang and
               Patrick Tague and
               Yue-Hsun Lin},
  title     = {Understanding IoT Security Through the Data Crystal Ball: Where We
               Are Now and Where We Are Going to Be},
  journal   = {CoRR},
  volume    = {abs/1703.09809},
  year      = {2017},
  url       = {http://arxiv.org/abs/1703.09809},
  archivePrefix = {arXiv},
  eprint    = {1703.09809},
  timestamp = {Sat, 23 Jan 2021 01:11:26 +0100},
  biburl    = {https://dblp.org/rec/journals/corr/ZhangDMDYZQW0TG17.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }
@ARTICLE{fuzzsurvey,
 author={Manès, Valentin Jean Marie and Han, HyungSeok and Han, Choongwoo and Cha, Sang Kil and Egele, Manuel and Schwartz, Edward J. and Woo, Maverick},
 journal={IEEE Transactions on Software Engineering},
 title={The Art, Science, and Engineering of Fuzzing: A Survey},
 year={2019},
 volume={},
 number={},
 pages={1-1},
 doi={10.1109/TSE.2019.2946563}
 }
@techreport{fuzzart,
  title={Fuzzing: the state of the art},
  author={McNally, Richard and Yiu, Ken and Grove, Duncan and Gerhardy, Damien},
  year={2012},
  institution={DEFENCE SCIENCE AND TECHNOLOGY ORGANISATION EDINBURGH (AUSTRALIA)}
 }
@inproceedings{towardsautodyn,
  title={Towards Automated Dynamic Analysis for Linux-based Embedded Firmware.},
  author={Chen, Daming D and Woo, Maverick and Brumley, David and Egele, Manuel},
  booktitle={NDSS},
  volume={1},
  pages={1--1},
  year={2016}
 }
--- a/feedback.pdf
+++ b/feedback.pdf
--- a/images/fuzzing2.pdf
+++ b/images/fuzzing2.pdf
--- a/paper.pdf
+++ b/paper.pdf
--- a/paper.tex
+++ b/paper.tex
@ -7,6 +7,7 @@
 %% SELECT LANGUAGE
 \usepackage[english]{babel}
 % \usepackage[ngerman]{babel}
 \usepackage{hyperref}
 \usepackage[backend=biber,doi=true,url=true,block=ragged,maxnames=6]{biblatex}
 \renewcommand*{\bibfont}{\footnotesize}
@ -32,55 +33,59 @@
 \usepackage{tabularx}
 % \usepackage[parfill]{parskip}
 \addbibresource{bibliography.bib}
 \usepackage{blindtext}
 \newcommand\blindsection{{\color{gray}\subsection{Some bindtext}\blindtext}}
 \begin{document}
 \title{Overview of IoT Fuzzing Techniques}
 \author{\IEEEauthorblockN{Tuan-Dat
-    Tran\\\small(3030462)}\\\IEEEauthorblockA{University of
+    Tran\\\small}\\\IEEEauthorblockA{University of
    Duisburg-Essen\\tuan-dat.tran@stud.uni-due.de}}
 \maketitle
 \begin{abstract}
-  Due to the rising popularity of IoT devices and embedded systems and their usage in not only in the business sector but also at home, the focus has been shifting on the security of those devices. To address this issue, there have been many approaches in detecting, analyzing and mitigating security flaws in IoT devices. One of the ideas to detect vulnerabilities in an automated manner is IoT Fuzzing. Contrary to regular fuzzing it comes with its own constraints and techniques to optimize performance and coverage of attack surfaces.
+  Due to the rising popularity of IoT devices and embedded systems and their usage in not only in the business sector but also at home, the focus has been shifting to the security of those devices. To address this issue, there have been many approaches in detecting, analysing and mitigating security flaws in IoT devices like static\cite{largeanal} and dynamic analysis\cite{autodyn}. Another approach to vulnerability detection is fuzzing.
-  In this paper we are comparing techniques used by IoT fuzzers to circumvent the adversities presented by IoT devices like app-based approaches by IoTFuzzer and Snipuzz or emulation approaches used by Firm-Afl.
+  Fuzzing is a technique originally used for automated black box testing software and became a highly researched topic\cite{iotfuzzer}\cite{siotfuzzer}\cite{wmifuzzer}\cite{avatar}, expanding its usage from black box testing to white and grey box testing. Fuzzers generate test cases to test software for vulnerabilities. The generation of those test cases are done in many ways.
  IoT fuzzers focus on fuzzing IoT devices. Although there are similarities to regular fuzzing, fuzzing IoT devices comes with its own constraints and techniques. 
  In this paper, we are comparing techniques used by IoT fuzzers to circumvent the challenges presented by IoT devices and the constraints of the solutions proposed by the IoT fuzzers. 
  Due to the wide range of different IoT fuzzing tools we are dividing the comparison of the techniques based on the type of IoT fuzzing tool. We also outline the evolution of IoT fuzzing techniques to visualize the progress made in the field. This overview can then be used to choose the optimal usage of a specific IoT fuzzing device in a given use case or combine different techniques used in different fuzzing tools to create a novel approach and find new security flaws through a combined usage of IoT fuzzing techniques.
 \end{abstract}
 \section{Introduction}
 % \label{sec:intro}
-Internet of Things (IoT) devices and embedded systems are becoming more and more prevalent, and with billions of devices being connected to the internet they are an integral part of everyday life\cite{hung2017leading}. Despite IoT devices being so widespread they are riddled with security vulnerabilities, which makes them an easy target for attackers, since many of those vulnerabilities are considered ``low-hanging fruits''. This led to over 70 unique attack incidents\cite{mcmillen2015security} between 2010 and 2016 while the number of IoT devices and embedded systems in use is steadily rising and with it the amount of vulnerabilities in the wild.
+Internet of Things (IoT) devices and embedded systems are becoming more and more prevalent, and with billions of devices being connected to the internet, they are an integral part of everyday life\cite{hung2017leading}. Despite IoT devices being so widespread, they are riddled with security vulnerabilities, which makes them an easy target for attackers, since many of those vulnerabilities are considered ``low-hanging fruits''\cite{iotfuzzer}. One example of such a vulnerability in IoT devices is the 2016 Mirai botnet which consisted of an average of 200,000 to 300,000 IoT devices\cite{mirai} while it was suspected that over half a million are vulnerable to the security vulnerabilities the Mirai Botnet utilized\cite{miraisec}.
-While implementation flaws and app over-privilege are just some of the many security problems an IoT device can have, detection and mitigation of these security flaws has proven itself to be challenging\cite{crash}. One approach to discover those flaws is called fuzz-testing, or fuzzing. Mitigation of found security flaws can often be hard due to the nature of embedded devices being heavily customized and often not adhering to one specific standard. Therefore, the fixing of security flaws is often left to the manufacturer of the device, since they possess the necessary tool chains, source code and pipelines to provide security patches to their devices.
+While information leakage\cite{crystal} and insecure login credentials\cite{mirai} are just some of the many security problems an IoT device can have, detection and mitigation of these security flaws has proven itself to be challenging. One approach to discover those flaws is called fuzz-testing, or fuzzing.
-Fuzzing is a method to test software for flaws by automatically generating and sending malformed data to the software. There are many ways to generate and send data to the software. An example for a specific type of input generation is mutation based fuzzing, which is utilized by IoTFuzzer\cite{iotfuzzer}\cite{OWASP2021Fuzzing}. Mutation based fuzzing takes a valid input and changes specific parts of it to trigger an unexpected state in the software and therefore crash it. Crashing or bringing the software into an unexpected state is the general goal of fuzzing, since behavior like this indicates the presence of a bug.
+Fuzzing is a method to test software for flaws by automatically generating and sending large amount of malformed data to the software. This is done while the fuzzer monitors the software's reaction to this data for malfunction like crashes or other unexpected behaviour. The goal of fuzzers is to detect vulnerabilities in the software in an automated manner. Despite the simplistic approach to vulnerability detection, it has proven itself to be effective\cite{fuzzsurvey}. The simplistic approach enabled researchers to extend the capabilities of fuzzing from creating test cases consisting of random data to sophisticated systems which use a wide variety of information about the context and source code of the software. The complexity of fuzzing tools goes so far that the techniques used by fuzzers diverge to such an extent that they can be divided into different classes, each with their own strength and weaknesses\cite{fuzzart}.
-Due to fuzzing being an automated process, fuzzing became a common tool for software testing in software development. Conventional fuzzing of software can be easily done concurrently, since software can, in most cases, be easily executed concurrently\cite{crash}. This increases the throughput of the fuzzer and thus the amount of test cases the software is tested against. This is one of the issues, which IoT fuzzers have to deal with, since the fuzzing IoT devices usually includes fuzzing the physical device itself if there is no emulation solution available. While emulation increases scalability, it also enables another range of issues and complexity to the fuzzing process e.g.\ the acquisition of the firmware to emulate. The process of firmware acquisition is different for every device, since it is dependent on the willingness of the manufacturer to publicly release the firmware. If the manufacturer does not release the firmware for his device, the firmware needs to be extracted directly from the device, which can vary in difficulty depending on the device\cite{crash}.
+A huge advantage of fuzzing compared to other vulnerability detection approaches is the automation of the analysis, which makes fuzzing a highly scalable method to find vulnerabilities\cite{crash}. Scaling up the fuzzing process enables a higher rate of generation test cases and therefore increases the coverage on the target program. On the other hand, insufficiently ``smart'' test generation methods can lead the fuzzer to run for a long time without finding any new code paths or otherwise advancing the fuzzing process\cite{vuzzer}.
-Alternatively to fuzzing there are other ways to test software for vulnerabilities like static and dynamic firmware analysis.  Static firmware analysis is the analysis of firmware without executing it by using tools like binwalk\cite{binwalk} to unpack the firmware and reverse engineer it with a reverse engineering tool like IDA\cite{IDA}\cite{firmup}. For dynamic analysis the firmware is executed to be investigated. This can be done in a multitude of ways, for example running the firmware on the original device or emulating the device to have the firmware run in the emulated environment. The running firmware's behavior is then analyzed\cite{autodyn}. The advantage of static analysis is the possibility to automate and scale the processes of analyzing the firmware\cite{largeanal}, since the testing does not depend on a physical device. On the other hand, static analysis also yields a high amount of false positives and may not find completely new vulnerabilities with the usage of its heuristics\cite{firmafl}. Another challenge during static analysis is the handling of packed or obfuscated code. This can be overcome with dynamic analysis\cite{avatar}, by emulating the physical device, which increases scalability and eliminates the need to acquire the physical device to test it\cite{autodyn}.
+IoT fuzzing applies the methodologies of fuzzing to IoT devices. Just like different kind of fuzzers, IoT fuzzing has its own advantages and disadvantages. 
 %
 IoT devices offer a large surface area regarding communication, e.g.\ network protocols, their companion app or their web interface\cite{iotfuzzer}\cite{boofuzz}\cite{wmifuzzer}. For this reason, fuzzers which were not originally designed to fuzz IoT devices can still be utilized for IoT fuzzing, like in the case of boofuzz, which was developed with the intent to fuzz network protocols\cite{boofuzz}. IoT Fuzzing also opens the door for new techniques, unique to IoT devices, by fuzzing the companion app of the device\cite{iotfuzzer}.
-IoT devices offer a large surface area regarding communication e.g.\ network protocols, their companion app or their web interface\cite{iotfuzzer}\cite{boofuzz}\cite{wmifuzzer}. For this reason fuzzers which were not originally designed to fuzz IoT devices can still be utilized for IoT fuzzing, like in the case of boofuzz, which was developed with the intent to fuzz network protocols\cite{boofuzz}. IoT fuzzers can also make use of techniques used by dynamic analysis since both approaches require execution of the firmware. This makes emulation a feasible way of testing IoT devices to increase scalability\cite{firmcorn}. In this work we will focus mainly on fuzzers, which were primarily developed for IoT fuzzing.
+In this paper, we present an overview of different fuzzing tools and techniques for IoT devices. We focus on advantages and disadvantages of those techniques in the context of their use-case to help developers and researchers find the right tool for their job and weigh in the positive and negative aspects of existing approaches. The IoT fuzzing tools, chosen for the overview, were chosen to cover as many recently developed fuzzing techniques to the best of our abilities.
-Even though IoT fuzzers are used for finding security vulnerabilities in devices, and fixing those errors or learning from them and mitigating them is the next logical step, we will not discuss mitigation techniques in this paper since this is outside of our scope. We will also not dive deep into the implementations of specific techniques. 
+The paper is structured as follows. First we introduce IoT devices, firmware and general fuzzing. In Section III, we lead into IoT fuzzing and its challenges to create a knowledge basis to introduce IoT fuzzing techniques in Section IV, the main section. Section V contains related work, that is closely tied to IoT fuzzing. And finishing up with the conclusion in section VI.\@
 By creating an overview of different IoT fuzzing techniques, we hope to archive a comprehensive list of IoT fuzzing tools and their properties to help developers and researchers to find the right tool for their job and weigh in the positive and negative aspects of existing approaches.
 \section{Background}
 \subsection{IoT devices and embedded systems}
-The terms IoT devices and embedded systems describe a great amount of devices. Embedded systems are devices which interact with their surroundings via sensors and regulators and are built to serve a specific purpose\cite{crash}. IoT devices on the other hand are broadly described as devices which extend regular devices with an internet connection and enable them to communicate over it\cite{posey2021iot}. The term embedded devices can describe many devices such as cameras or industrial control systems (ICS), which makes it hard to generalize embedded devices. This also applies to IoT devices, which includes the definition of internet capable embedded systems. Ongoing, when we describe IoT devices, the description also fits embedded systems if not explicitly mentioned otherwise.
+The terms IoT devices and embedded systems describe a large amount of devices. Embedded systems are devices which interact with their surroundings via sensors and regulators and are built to serve a specific purpose\cite{crash}. IoT devices on the other hand are broadly described as devices which extend regular devices with an internet connection to enable them to communicate over the internet\cite{posey2021iot}. The term embedded devices can describe many devices such as cameras or industrial control systems (ICS), which makes it hard to generalize embedded devices. This also applies to IoT devices, since the extension of an embedded system by an internet connection, makes it an IoT device.
-The wide applicability of IoT devices in the context of business, manufacturing and home-use increases the surface area for vulnerabilities to be found. IoT devices, being so diverse regarding their functionalities and ways to offer their services, further increases the possible ways to accumulate vulnerabilities.%\textbf{citation neeeded}.
+%The wide applicability of IoT devices in the context of business, manufacturing and home-use increases the surface area for vulnerabilities to be found. IoT devices are diverse regarding their functionalities and ways to offer their services, further increases the possible ways to accumulate vulnerabilities\cite{crystal}. This makes them a 
-IoT devices, due to being built for specific purposes, don't need as much processing power as a general computer does. This leads to them having a hardware platform specifically tailored to their use case. And due to the heterogenic nature of IoT devices in terms of e.g. OS, instruction sets or memory layouts, analysis of the firmware proves difficult\cite{largeanal}. Reasons for this are the different requirements a manufacturer has for the device like the energy efficiency, real-time capability or memory footprint\cite{lowendos}.
+IoT devices, due to being built for specific purposes, do not need as much processing power as a general computer does. This leads to them having a hardware platform specifically tailored to their use case. And due to the heterogenic nature of IoT devices in terms of e.g.\ operating systems, instruction sets or memory layouts, analysis of the firmware proves difficult\cite{largeanal}. Reasons for this are the different requirements a manufacturer has for the device like the energy efficiency, real-time processing or memory footprint\cite{lowendos}.
 \begin{figure}
  \begin{center}
@ -89,151 +94,260 @@ IoT devices, due to being built for specific purposes, don't need as much proces
  \caption{Example of IoT home network (inspred by Wang et al.\cite{wmifuzzer}).}\label{fig:iotnetwork}
 \end{figure}
-As mentioned earlier, IoT devices, and especially home-based ones, use multiple ways to connect to the internet. IoT devices connect to the internet either directly through WiFi or via an intermediary device like a smartphone and connecting to it with Bluetooth\cite{wmifuzzer}. Another way is having an IoT hub which acts as proxy between other IoT devices and either another intermediary via Bluetooth or directly WiFi. This leads to many ways an IoT network can be structured depending on the kind and number of IoT devices (\Cref{fig:iotnetwork}). 
+IoT devices, and especially home-based ones, use multiple ways to connect to the internet. IoT devices connect to the internet either directly through Wi-Fi or via an intermediary device like a smartphone and connecting to it with Bluetooth. Another way is having an IoT hub which acts as proxy between other IoT devices and either another intermediary via Bluetooth or directly Wi-Fi\cite{wmifuzzer}. This leads to many ways an IoT network can be structured, depending on the kind and number of IoT devices (\Cref{fig:iotnetwork}). 
-IoT firmware is the bridge between the hardware of the device and the software running on it. Sometimes IoT firmware can be acquired through the vendors website. Alternative methods for acquiring the firmware are extraction from the physical device, even though this way can be challenging due to debugging ports (e.g. JTAG interface) to dump the firmware from the device may not be available\cite{iotfuzzer}\cite{mulbin}. Firmware running on an IoT device expects the presence of certain hardware at boot- and/or runtime. Therefore missing hardware may cause the device to get stuck in a busy loop trying to find the hardware\cite{firmfuzz}. 
+The works of Hahm et al.\cite{lowendos} propose a classification into low-end and high-end IoT devices and dividing those two classifications into three subcategories for low-end devices. Those classes represent the complexity and computing capability of those devices, with ``Class 0'' having the least resources and ``Class 2'' devices having the most resources.
-% 
+Multi-purpose systems (i.e.\ smartphones and computers) deploy many mechanisms to detect faults like segmentation faults and report them through core dumps. IoT devices may not have such functionalities. The more minimalistic design of IoT fuzzers causes them to only perform the specific tasks they were built for. Therefore, functionalities like heap hardening may not be present due to the IoT device's limited computing power and constrained costs\cite{crash}. 
 Additionally firmware is often packed or even encrypted, which poses as an obstacle for firmware analysis. In some cases proprietary compression algorithms or encryption without knowledge of the secret key makes firmware analysis infeasible or even impossible.
-The works of Hahm et al.\cite{lowendos} propose a classification into low-end and high-end IoT devices and dividing those two classifications into three subcategories for low-end devices. Those classes represent the complexity and computing capability of those devices with ``Class 0'' having the least resources and ``Class 2'' devices having the most resources.
+\subsection{Firmware}
-% 
+IoT firmware on IoT devices is the software, that acts as an intermediate between higher level software and the hardware of the device. This functionality is provided by the firmwares simplified interface of lower level functionalities, that can be used by higher level software\cite{firmcorn} to communicate with the hardware. Since firmware communicates with many parts of the IoT device, it contains a lot of information about it.
 In the works of Muench et al.\cite{crash} a similar classification is used. They are classified in ``Type-0'' to ``Type-III'' systems. T0 (Type-0) systems represent multi-purpose systems, which don't fall under the classification of embedded systems or IoT devices. T1 (Type-1) devices are devices, which use a general purpose operating system like Linux. The OS (operating system) is often modified to be more lightweight and offer a lightweight user environment like busybox. T2 (Type-2) devices run on customized operating systems which are tailored to the devices use case. In order to save space and computational power, typical OS functions like a Memory Management Unit may be omitted. T3 (Type-3) devices run on a single control loop. On these devices the firmware and the software, which runs the devices functionalities, are a single instance. This leads to a so-called ``blob firmware''\cite{karonte}, consisting of the application and system code compiled together. Muench et al.\cite{crash} add that the classification of the device merely indicates the kind of available security mechanisms while the usage of them varies from device to device.
-Multi-purpose systems (i.e.\ smartphones and computers) deploy many mechanisms to detect faults like segmentation faults and report them through core dumps. IoT devices may not have such functionalities. The more minimalistic design of IoT fuzzers causes them to only perform the tasks they were built for. Therefore functionalities like heap hardening may not be present due to the IoT devices limited computing power and constrained costs\cite{crash}. 
+There are several types of firmware based on the type of device they are used in. In the works of Muench et al.\cite{crash} devices are classified in ``Type-0'' to ``Type-III'' systems:
 T0 (Type-0) systems represent multi-purpose systems, which don't fall under the classification of embedded systems or IoT devices. 
 T1 (Type-1) devices are devices, which use a general purpose operating system, like Linux. The operating system on T1 devices is often modified to be more minimalistic and offer a lightweight user environment like busybox.
 T2 (Type-2) devices run on completely customized operating systems which are tailored to the device's use case. In order to save space and computational power, typical operating system functions like a Memory Management Unit may be omitted.
 T3 (Type-3) devices run on a single control loop. On these devices, the firmware and the software, which runs the device's functionalities, are a single instance. This leads to a so-called ``blob firmware''\cite{karonte}, consisting of the application and system code compiled together.
 Like all software, firmware is susceptible to bugs and misconfigurations, which can lead to vulnerabilities\cite{largeanal}. For this reason, analysis tools are needed to find such vulnerabilities. There are several methods to analyse firmware for bugs, but they all have to face the challenge of working around the heterogenity of firmware\cite{largeanal}.
 To analyse firmware, firmware first has to be acquired. This can be done by downloading it from the vendor's website. An alternative method of acquiring firmware is extracting it from the physical device. This is done by either some kind of debugging port or by reading the flash memory directly. Extracting firmware manually poses a challenge in itself, since debugging ports are not always available on the end product\cite{iotfuzzer}\cite{mulbin}.
 Additionally, firmware is often packed or even encrypted, which poses yet another obstacle for firmware analysis. In some cases, proprietary compression algorithms or encryption make firmware analysis infeasible or even impossible.
 \subsection{Fuzzing}
-Fuzzing describes the process of testing a software for faulty and unexpected behavior by sending malformed messages as input for the software\cite{OWASP2021Fuzzing}. The basic fuzzing process can be divided into three steps: (1) input generation (2) sending messages as input to software and (3) monitor software behavior in reaction to the given input (\Cref{fig:fuzzing}). Due to the need to have the tested software running, fuzzing is considered a dynamic technique.
+Fuzzing describes the process of testing a software for faulty or unexpected behaviour by sending it malformed messages as input\cite{OWASP2021Fuzzing}. 
 \begin{figure}
  \begin{center}
    \includegraphics[width=\linewidth]{fuzzing}
  \end{center}
  \caption{Generalization of fuzzing process.}\label{fig:fuzzing}
 \end{figure}
 Advantages of fuzzing are the automation and scalability of the process. This enables fuzzing to run many test cases in a short amount of time which makes throughput of the fuzzer an important metric in evaluating fuzzers\cite{angora}. This is achieved by easily being able to run software concurrently on multiple processors. An alternative way is running the software in a virtual environment\cite{crash}. 
-There are multiple types of fuzzing techniques based on the amount of known information about the software: Whitebox, blackbox and greybox fuzzing. Whitebox fuzzing has complete information about the software's source code. Blackbox fuzzing on the other hand has no such information while greybox fuzzing lies in between regarding the available information. Blackbox fuzzing relies purely on the binary of a program or the program in its already executed state\cite{hfuzz}. This leads to blackbox fuzzers generally creating many unnecessary test cases due to the lack of knowledge about the target\cite{firmcorn}. Greybox fuzzers may use the additional information to improve the monitoring by injecting instrumentation to the binary at compile time\cite{angora}. Whitebox fuzzers can utilize the full source code to increase efficiency by using techniques like symbolic execution or dynamic taint analysis\cite{vuzzer}. Comparing blackbox fuzzers with greybox fuzzers or even whitebox fuzzers is therefore not worthwhile, due to the different starting conditions\cite{snipuzz}.
+There are multiple types of fuzzing techniques based on the amount of known information about the software: White box, back box and grey box fuzzing. White box fuzzing has complete information about the software's source code. Black box fuzzing on the other hand has no such information, while grey box fuzzing lies in between regarding the available information.
 % 
 Black box fuzzing relies purely on the binary of a program or the program in its already executed state\cite{hfuzz}. This leads to back box fuzzers generally creating many unnecessary test cases due to the lack of knowledge about the internals of the target\cite{firmcorn}. Another problem with back box fuzzers is the detection of errors. Internal system errors, which may lead to misbehaviour at a later time, can not be easily detected by black box fuzzers as they occur. Black box fuzzers therefore often rely on externally visible exceptions. Advantages of black box fuzzing are the narrow and quick tests due to the limited surface area to target, focusing only on the aspects of the software the user interacts with\cite{compwbgbox}. Additionally, back box fuzzing may be the only way of fuzzing a target when there is no source code available.
 % 
 White box fuzzers on the other hand have access to the source code of the fuzzing target. Test cases generated by white box fuzzers are based on the analysis of the given source code. Techniques like symbolic execution or dynamic taint analysis are utilized to increase the efficiency of the fuzzer. In comparison to back box fuzzing, white box fuzzing usually has a higher overhead since the additional analysis is performed on the targets source code\cite{fuzzsurvey}.
 % 
 Grey box fuzzers take the middle ground between white and black box fuzzers and only use some information about the internals of the target software to improve the fuzzing process. This may be done by injecting instrumentation to the binary at compile time\cite{angora} or by performing lightweight static analysis on the source code of the software\cite{fuzzsurvey}. The usage of limited knowledge enables grey box fuzzers to have higher throughput than white box fuzzers, while being more accurate than black box fuzzers.
 %
 Comparing back box fuzzers with grey box fuzzers or even white box fuzzers is therefore not feasible, due to the different starting conditions and use cases\cite{snipuzz}.
-During the input generation step the fuzzer generates and prepares messages according to its generation strategy. Choosing which generation strategy is used depends on the given information or constraints of the system that is fuzzed. The fuzzer radamsa\cite{radamsa}, a general purpose blackbox fuzzer, for example creates messages derived from a possibly valid input and changes parts of it to generate new test cases. This classifies it as a mutation based fuzzer, since radamsa modifies existing input to create test cases. The operations on the given input can be substitution of characters, bit flips or other operations, based on the tools internal heuristics (\Cref{lst:radamsa}). There are lists, which contain strings that have a high probability to cause issues when used as input\cite{blons}\cite{fuzzdb}. These lists may be used by fuzzers as well to generate input but the generated input can also be random. The goal is to find an input which makes the software crash or display otherwise unexpected behavior.
+The basic fuzzing process can be divided into three steps: (1) input generation and sending that input to the software, (2) monitoring the software's behaviour in reaction to the given input and (3) adjusting the input according to the software's behaviour (\Cref{fig:fuzzing}).
-\begin{figure}[t]
+During the input generation step, the fuzzer generates and prepares messages according to its generation strategy. Choosing which generation strategy is used depends on the given information or constraints of the system that is being fuzzed. The given information about the fuzzing target differentiates fuzzers into the categories black-, white- and grey box fuzzers. 
  \begin{lstlisting}[language=sh,firstnumber=0,breaklines=true]
    > echo "rm -rf / --no-preserve-root" | radamsa -n 5
    rm -rf / --no-presef / --no-preserve-root
    rm -rf / --no-preserve-root
    rm -rf  --no-preserve-roo
    rm -rf!!;xcalc\0\u0000&#000;\n\340282366920938463463374607431768211457!xcalc$'%d\0$!!%d\x00 / --no-preserve-root
    rm -rf / --no-preserve-r'xcalc%#x'xcalcaaaa%d%n\0\x0aNaN%#x%p%d;xcalc+infoot
  \end{lstlisting}
  \caption{Example output of radamsa on ``rm -rf / --no-preserve-root'' (omitted non-printable characters)}\label{lst:radamsa}
 \end{figure}
-The message sending step depends on the target of the message. Software offers many ways to interact with it, from simple things like user input via text fields in desktop applications to packages sent by the users through web browsers to web servers. Those points of contact are possible targets for fuzzing. And dependent on the target, different techniques for message generation may be used. If a network protocol is fuzzed, like with the tools boofuzz\cite{boofuzz}, the fuzzer needs to have an understanding of the network protocol which is fuzzed. While other fuzzers like XSStrike\cite{xsstrike}, which was built to find XSS (cross site scripting) bugs, target web applications. While XSS bugs will not crash the software, they are a serious security threat, which enable an attacker to inject code to websites\cite{xss}.
+Monitoring the software's behaviour upon receiving a malformed message as input is another step of a typical fuzzing loop. The monitored behaviour depends on the earlier steps, but after every loop the original program's state should be restored to have an equal ground for all test cases. When the fuzzer looks for XSS bugs or SQL injections, the program will not crash, when such a bug is triggered. This has to be taken into consideration while monitoring the software and therefore other methods of detecting those bugs will have to be used than methods, which are used to detect crashes due to memory errors like buffer overflows. Fuzzers who do not try to trigger crashes usually use the application's answer to the input message to determine whether the test case triggered the event, which was tested for\cite{boofuzz}. To monitor a software's crash, the fuzzer can provide instrumentation, with which the tested software is compiled\cite{afl}, if grey box fuzzing is used. A back box approach could be monitoring for specific output of the software after a malformed input has been sent, or monitoring the status of the network connection for networking capable software. A fuzzers goal is to cover as many parts of the software as possible. Evaluating the coverage of the software is only possible for grey box or white box fuzzers, since they can instrument the code. This metric can be used to guide the fuzzing process of generating input, like in the state-of-art mutational fuzzer AFL\cite{afl} and its fork afl++\cite{aflpp}.
-Monitoring the softwares behavior upon receiving a malformed message as input is the last step of a typical fuzzing loop. The behavior monitored depends on the earlier steps, but after every loop the original programs state should be restored to have an equal ground for all test cases. When the fuzzer looks for XSS bugs or SQL injections, the program will not crash, when such a bug is triggered. This has to be taken into consideration while monitoring the software and therefore other methods of detecting those bugs will have to be used than methods, which are used to detect crashes due to memory errors like buffer overflows. Fuzzers who don't try to trigger crashes usually use the applications answer to the input message to determine whether the test case triggered the event, which was tested for\cite{boofuzz}. To monitor a softwares crash the fuzzer can provide instrumentation, with which the tested software is compiled\cite{afl}, if greybox fuzzing is used. A blackbox approach could be monitoring for specific output of the software after a malformed input has been sent or monitoring the status of the network connection for networking capable software. A fuzzers goal is to cover as many parts of the software as possible. Evaluating the coverage of the software is only possible for greybox or whitebox fuzzers, since they can instrument the code. This metric can be used to guide the fuzzing process of generating input like in the state-of-art mutational fuzzer AFL\cite{afl} and its fork afl++\cite{aflpp}.
+Another property of fuzzers is their adjustments to the input after a fuzzing loop is done. They are categorized into smart and ``dumb'' fuzzers. Dumb fuzzers are not aware of the input structure and therefore only try random input, substitutions based on heuristics, delete parts of the input or add parts to the input. This can lead to a lot of test cases, which do not lead anywhere. Another disadvantage is that input generated by a dumb fuzzer may easily be dismissed if a specific input structure is expected. Looking at smart fuzzers, which try to generate valid input based on the software's protocol\cite{boofuzz}, grammar\cite{grammarinator} or model\cite{modelfuzz}. %\textbf{describe grammar, protocol, model}
 Another property of fuzzers is their adjustments to the input after a fuzzing loop is done. They are categorized into smart and ``dumb'' fuzzers. Dumb fuzzers aren't aware of the input structure and therefore only try random input, substitutions based on heuristics, delete parts of the input or add parts to the input. This can lead to a lot of test cases, which don't lead anywhere. Another disadvantage is that input generated by a dumb fuzzer may easily be dismissed if a specific input structure is expected. Looking at smart fuzzers, which try to generate valid input based on the softwares protocol\cite{boofuzz}, grammar\cite{grammarinator} or model\cite{modelfuzz}. %\textbf{describe grammar, protocol, model}
 To perform smart fuzzing the input model must be provided to the fuzzer, which may not be as easily accessible on proprietary devices, although there are ways to derive an input model from a large sample of valid and invalid input.
 \section{Intricacies of IoT Fuzzing}
 IoT Fuzzing is the application of fuzzers on IoT devices, which poses new challenges, since fuzzing hardware and their firmware and fuzzing software work on different domains, each with their own challenges.
-The works of Muench et al.\cite{crash} offers insight into the challenges of fuzzing embedded devices. They mention three main challenges.
+% maybe also include some feedback,e .g. grey box fuzzing?
-% 
+\begin{figure}
-The first challenge being fault detection. Regular fuzzing assumes that crashes are generally observable. Due to an IoT devices limited computational capability fault detection functionalities, usually present in multi-purpose devices, are rarely present in embedded systems.
+  \begin{center}
-% 
+    \includegraphics[width=\linewidth]{fuzzing2}
-Even when crash causing fault detection mechanisms are available, they would be logged on multi-purpose systems while embedded devices usually do not provide feedback like multi-purpose systems do due to the lack the necessary I/O capabilities.
+  \end{center}
-% 
+  \caption{Fuzzing with AFL\cite{afl}.}\label{fig:fuzzing}
-A liveness check, also called probing, can be performed to check the status of the device while fuzzing it. Probing can be either active and passive. During active probing the fuzzer sends regular known to be valid messages to the target system and evaluates the response. The messages sent by the fuzzer may cause a state change in the tested device, which has to be accounted for. Passive probing uses the devices responses to the test message to determine liveness or observes visible crashes.
+\end{figure}
-Muench et al.\cite{crash} expands on this by classifying system crashes by their observability. An observable crash is therefore the most visible and managable kind of crash, where the tested device stops running and provides an error message or another 6 that is easily visible. It is added that this also includes crashes, which don't provide additional information about the crash. Observable crashes are the optimal case regarding crashes, since they are visible and enable the fuzzer or tester to react without delay.
+Advantages of fuzzing are the automation and scalability of the process. This enables fuzzing to run many test cases in a short amount of time, which makes throughput of the fuzzer an important metric in evaluating fuzzers\cite{angora}. This is achieved by easily being able to run software concurrently on multiple processors. An alternative way is running the software in a virtual environment\cite{crash} and executing the virtual environments concurrently. 
 \section{Challenges of IoT Fuzzing}
 IoT fuzzing is the application of fuzzing techniques on IoT devices. This approach poses new challenges, since fuzzing hardware and its firmware and fuzzing software operate on different domains, which have each their own challenges.
 Muench et al.\cite{crash} describe the main challenges:
 %
-Reboots are another kind of crash. Crashes on T3 devices automatically lead to a reboot, since the crashed software and firmware on the device are part of the same ``blob firmware''. On other kinds of devices, a service may crash while the rest of the system continues to run without problems.
+The first challenge, fault detection, is about the complexity of observing crashes on IoT devices during the fuzzing process. Fuzzing regular software may already yield unobservable unexpected behaviour. Working with IoT devices adds another layer to this problem, since IoT devices do not have the same I/O capabilities and memory protection measurements as a multi-purpose system does.
 %
-In reaction to malformed input a device may hang. That means that it halts execution and doesn't react to any more input. This may be due to being stuck in an infinite loop. This leads to a slowdown in throughput and the device needs to be restarted if such behavior is found.
+The second challenge in IoT fuzzing is the performance and scalability of the fuzzing process. Running a regular fuzzer concurrently on multiple processes rarely poses a challenge. When fuzzing IoT devices, either multiple copies of the same device have to be bought to create a comparable scenario, which is often infeasible or emulation has to be utilized, which poses its own multitude of challenges.
 % 
 Late Crashes pose a challenge for testing the device. This behavior is described as the device crashing after a non-negligible amount of time after the cause of the crash, like a malformed message, is sent, which makes correlation between the cause and the crash challenging.
 % 
 At last there are cases where neither the device nor the software crash while still being in an unexpected state. This can lead to wrong data and incorrect results. This malfunctioning of the device is hard to detect, since the fuzzer needs information about the expected response to determine whether its an output caused by a malfunction or not. This is further complicated due to the diverse message formats in use\cite{snipuzz}
 %
 The third challenge is instrumentation, used in non-back box fuzzing approaches, to collect code coverage information and detect subtle memory corruptions. There are multiple approaches to add instrumentation to a program or its environment for regular fuzzing. A challenge of adding instrumentation to IoT devices is the need to often use static and dynamic analysis to imitate the functionalities of instrumentation, since they often can not be directly applied to the IoT device. The reasons for this will be explained later.
 (1) The challenge of fault detection on IoT devices means, that memory corruptions in the device caused by IoT fuzzers can often go unnoticed since they do not necessarily lead to crashes. Protection measurements on multi-purpose systems detect memory corruptions on the system caused by fuzzing and cause a crash, making memory corruptions therefore visible to regular fuzzers. Such measurements are rarely implemented on IoT devices due to limited computing resources\cite{crash}.
 A liveness check, also called probing, can be performed to check the status of the device while fuzzing it. Probing can either be active or passive. During active probing, the fuzzer sends regular known to be valid messages to the target system and evaluates the response. The messages sent by the fuzzer may cause a state change in the tested device, which has to be accounted for. Passive probing uses the device's responses to the test message to determine liveness or observes visible crashes.
 Muench et al.\cite{crash} expands on this by classifying system crashes by their observability. An observable crash is therefore the most visible and manageable kind of crash. During observable crashes, the device stops running and provides some form of error message or draws attention to the faulty behaviour in another way. It is added that this also includes crashes, which do not provide additional information about the crash, such as error messages. Observable crashes are the optimal case among system crashes, since they are visible from the outside and enable the fuzzer or tester to react to the crash without delay.
 Reboots are another kind of crash. A crash inducing error of a software on an IoT device usually does not lead to the crash of the whole system, since they work indipendently from each other. In T3 devices, where the software and firmware are one and the same, a crash of an outward facing service leads to a crash of the whole system.
 In reaction to malformed input, a device may hang. That means that it halts execution and does not react to any more input. This may be due to being stuck in an infinite loop. This leads to a slowdown in throughput of a fuzzer and the device needs to be restarted if such behaviour is found.
 Late Crashes pose a challenge for testing the device. This behaviour is described as the device crashing after a non-negligible amount of time after the real cause of the crash, like a malformed message, is sent. This makes correlation between the cause and the crash challenging.
 At last, there are cases where neither the device nor the software crash while still being in an unexpected state, which leads to wrong data or an incorrect output. This kind of malfunctioning of the device is hard to detect, since the fuzzer needs information about the expected response to determine whether it is an output caused by a malfunction or not. This is further complicated due to the diverse message formats used in IoT devices\cite{snipuzz}.
 There are also cases of malformed input not causing any visible effects, even when errors occurred. These errors may cause crashes or malfunctioning at a later time, which makes detecting them during fuzzing almost impossible without instrumentation\cite{snipuzz}.
-The second challenge is performance and scalability. While regular fuzzers can execute and test software concurrently to increase the throughput and therefore find more possible faults in the software over time. Fuzz testing on an IoT device is not possible in the same manner, since a physical device is being fuzzed. Even though multiple copies of the same device could be purchased, to scale up the test cases, it would become infeasible due to financial limitations and infrastructure requirements like power and space. Emulation can help with the problem of scalability by emulating the test device, but this approach faces the challenge of IoT devices being dependent on the hardware components of the device\cite{firmafl}. 
+(2) The second challenge is performance and scalability. While regular fuzzers can execute and test software concurrently to increase the throughput and therefore find more possible faults in the software over time. Fuzz testing on an IoT device is not possible in the same manner, since a physical device is being fuzzed. Even though multiple copies of the same device could be purchased, to scale up the test cases, it would become infeasible due to financial limitations and infrastructure requirements like power and space. Emulation can help with the problem of scalability by emulating the test device, but this approach faces the challenge of IoT devices being dependent on the hardware components of the device\cite{firmafl}.
 % 
 After a fuzzing loop the original state of the tested device has to be established to start every fuzzing attempt with the same starting conditions. This is not a challenge with regular software, since the softwares original state is reestablished after rerunning it and changes on the file system can be reverted with a e.g.\ snapshot of the virtual VM (virtual machine). Restarting an IoT device can take up to a minute, since the device needs to be completely rebooted to get it to a neutral state.
-The third challenge Muench et al.\cite{crash} mentions is the instrumentation. Instrumentation on desktop systems is used to obtain coverage information about the software that is being fuzzed and detect memory corruptions by adding them during compile time or run time.
+After a fuzzing loop, the original state of the tested device has to be established to start every fuzzing attempt under the same conditions. This is not a challenge with regular software, since the software's original state is re-established after rerunning it. Changes on the file system, that were caused by the tested software can be easily reverted with e.g.\ a snapshot of the virtual machine running the test. To establish a testing condition on IoT devices, without the knowledge of its internals, the easiest method is restarting an IoT device. This step can take up to a minute, which negatively affects the throughput of IoT fuzzers.
 % 
 Instrumentation being added during compile time therefore requires the firmware beforehand. This is already an issue on IoT devices, since acquiring the firmware is not always possible. Additionally the variety of operating systems and processor architectures, makes instrumentation on IoT devices a challenging task. Obtaining the manufacturers tool chain to re-compile the firmware is rarely possible. This could be solved by utilizing binary dynamic instrumentation frameworks like valgrind\cite{valgrind} or using QEMUs instrumentation\cite{triforceafl}, but these methods heavily depend on the OS and CPU architecture.
-Furthermore, IoT fuzzing suffers from the similar or the same problems as regular fuzzing based on the fuzzing approach. Therefore an IoT fuzzer which utilizes network protocol fuzzing will face the same challenges as the used network protocol fuzzer, like generating valid input\cite{diane}, on top of the aforementioned challenges of fuzzing an IoT device.
+(3) The third challenge Muench et al.\cite{crash} mentions is instrumentation. Instrumentation on desktop systems is used to obtain coverage information about the software that is being fuzzed and detect memory corruptions by adding them during compile or run time.
 % 
 % does instrumentation require emulation? How does instrumentation work if running the software on a dedicated iot device?
 Instrumentation being added during compile time therefore requires the firmware beforehand. This is already an issue on IoT devices, since acquiring the firmware is not always possible. Additionally, the variety of operating systems and processor architectures, makes instrumentation on IoT devices a challenging task. Moreover, obtaining the manufacturer's tool chain to re-compile the firmware with instrumentation is rarely possible. A workaround to this approach could be the use of binary dynamic instrumentation frameworks like valgrind\cite{valgrind} or using QEMU's instrumentation\cite{triforceafl}, but these methods heavily depend on the OS and CPU architecture.
 % Tuan:???
 %Furthermore, IoT fuzzing suffers from the similar or the same problems as regular fuzzing based on the fuzzing approach. Therefore an IoT fuzzer which utilizes network protocol fuzzing will face the same challenges as the used network protocol fuzzer, like generating valid input\cite{diane}, on top of the aforementioned challenges of fuzzing an IoT device.
 \section{Overview of IoT Tools and Techniques}
-In this section, we are going to create an overview of different IoT fuzzers, list the techniques they utilize and look at the techniques advantages and disadvantages (\Cref{tab:toollist}).
+Here we give an overview of different IoT fuzzers, their techniques and list their advantages and disadvantages (\Cref{tab:toollist}).
 \begin{table*}
  \begin{center}
-    \begin{tabularx}{\textwidth}{cccXX}
+    \begin{tabularx}{\textwidth}{cccX>{\hsize=.5\hsize}X}
      Tool                        & Technique & Target           & Fuzzing Techniques & Crash detection \\
      \toprule
      SIoTFuzzer\cite{siotfuzzer} & Black box  & Web Interface    & Stateful Message Generation* & Network Monitor\\\midrule
-      IoTFuzzer\cite{iotfuzzer}   & Blackbox  & Companion App*   & Generation\&Mutation\newline Taint analysis&Passive probing\\\midrule
+      IoTFuzzer\cite{iotfuzzer}   & Black box  & Companion App*   & Generation\newline Mutation\newline Taint analysis&Passive probing\\\midrule
      Firm-AFL\cite{firmafl}      & Grey box   & Firmware         & Mutation \newline Augmented Process Emulation* & Emulation\\\midrule
      Snipuzz\cite{snipuzz}       & Black box  & API & Snippet-based mutation*\newline & Network Monitor  \\\midrule
-      Firmcorn\cite{firmcorn}     & Greybox   & Firmware & Optimal virtual execution*\newline Vulnerability-oriented fuzzing*& Instrumentation \\\midrule%Memory corruption check \& Exception detection
+      Firmcorn\cite{firmcorn}     & Grey box   & Firmware & Optimal virtual execution*\newline Vulnerability-oriented fuzzing*& Instrumentation \\\midrule
-      % FirmFuzz\cite{firmfuzz}     & Greybox   & Web Interface & Generational\newline & \\ \midrule
+      Diane\cite{diane}           & Black box  & Companion App  & Under-constraint Input Generation* &  Passive probing\newline Active Probing\\\midrule
      Diane\cite{diane}           & Blackbox  & Companion App  & Mutation &  Active probing\\\midrule
      HFuzz\cite{hfuzz}           & Grey box   & Network protocol & Message Structure Tree*& Instrumentation \\\midrule
-      % IoTHunter\cite{iothunter}   & Greybox   & Network protocol & Multi-level message generation* &                 \\
+      WMIFuzzer\cite{wmifuzzer}   & Black box  & Web Interface & Mutation\newline Generation & Network Monitor\\
      WMIFuzzer\cite{wmifuzzer}   & Blackbox  & Web Interface & Mutation & Network Monitor\\
      \bottomrule
    \end{tabularx}  
  \end{center}
  * = Novel technique in fuzzer
  \caption{An overview of different IoT fuzzing tools.}\label{tab:toollist}
 \end{table*}
-\subsection{Mutational fuzzing}
+
-Mutation based fuzzing is a method of input generation\cite{tfuzz}. Mutational fuzzing requires predefined messages to start the mutation on. These mutation can include e.g.\ bit flipping, checking for out of bound bugs, sending empty data or substituting parts of the message with random data\cite{snipuzz} to explore new program states or trigger unexpected behavior. 
+\subsection{Input Generation}
-\subsection{Generational fuzzer}
+\subsubsection{Mutational fuzzing}
-Generation based fuzzers create messages from scratch while being provided with the format specifications for the input. Creating such a format specification requires manual effort and may even be infeasible, especially if a format is not available\cite{tfuzz}. 
+Mutation based fuzzing is a method of input generation\cite{tfuzz}. Mutational fuzzing requires a set of predefined messages to start the mutation on. These mutations can include e.g.\ bit flipping, checking for out of bound bugs, sending empty data or substituting parts of the message with random data\cite{snipuzz} to explore new program states or trigger unexpected behaviour. This way the fuzzing process can get started easily with only a couple of, so called, seed messages. A disadvantage of mutational fuzzing is the limited coverage. A mutational fuzzer can rarely generate input, that deals with a target's complex sanity checks, since mutational input generation does not take the input format into account\cite{tfuzz}.
-\subsection{Snippet-based mutation}
+\subsubsection{Generational fuzzer}
 Generation based fuzzers create messages from scratch while being provided with the format specifications for the input. Creating such a format specification requires manual effort and may even be infeasible, especially if the format is not available\cite{tfuzz}. In the work of Srivastava et al.\cite{firmfuzz} they attribute the performance of FirmFuzz to their generational approach of input generation, due to resulting the constrained state space, that leads to a decreased overhead.
 \subsubsection{Under-constrained Input Generation}
 Under-constrained Input Generation is a technique utilized by the fuzzer DIANE.\@ Here a combination of static and dynamic analysis is used on the companion app to find functions, that produce ``valid yet under-constrained'' inputs for the IoT device. The companion app's own functions are then used to generate input for the IoT device, that is not constrained by the app and structurally correct enough to not be discarded by the IoT device. The limitations of this approach lie in the implementation of the app analysis to find the desired functions. Additionally, since this is a back box approach to input generation, coverage is another issue.
 \subsubsection{Snippet-based mutation}
 Snipped-based mutation is a novel approach to input generation of Snipuzz\cite{snipuzz}. Snippet-based mutation is the application of the mutation-based fuzzing approach on snippets. Snippets are parts of messages, determined by a heuristic algorithm and hierarchical clustering. Those snippets are categorized by the response they trigger from the IoT device. Snippets are then used to build new messages to trigger new program states. This method of mutation and message generation creates messages, which more likely follow message or protocol constraints of IoT devices, which leads to more effective fuzzing. Since this mutation method is guided by the response of the tested device, detailed responses are required to accurately categorize snippets\cite{snipuzz}. 
-\subsection{Message Structure Tree}
+\subsubsection{Message Structure Tree}
-Message Structure Tree is a mutational fuzzing technique where the valid input is analyzed to create a tree structure based on heuristics to mutate single fields of the input\cite{hfuzz}. This way the grammar of the protocol can be derived without explicitly providing the input format.
+Message Structure Tree is a mutational fuzzing technique where the valid input is analysed to create a tree structure based on heuristics to mutate single fields of the input\cite{hfuzz}. This way, the grammar of the protocol can be derived without explicitly providing the input format.
-\subsection{Binary Rewriting/Instrumentation}
+\subsubsection{Stateful Message Generation}
 Binary rewriting can be used to add instrumentation to firmware\cite{crash}. Instrumentation can be used to e.g.\ add hooks to specific functions. This is interesting for fuzzing once instrumentation is added to internal exceptions to check for crashes or otherwise unexpected behavior\cite{firmcorn}. To perform binary rewriting disassembly of the firmware is necessary, which requires partial decompilation. An additional challenge is the missing room for additional instrumentation due to embedded devices being optimized for their memory usage\cite{crash}.
 \subsection{Full Emulation}
 Emulation tackles the problems of throughput and scalability in IoT fuzzing. This is done by improving the performance, success rate and hardware-indipendance of fuzzers\cite{snipuzz}. Full emulation of the firmware with the help of heuristics mitigates the lack of fault detection and increases accuracy of found vulnerabilities to a level of desktop system application fuzzers. Additionally emulation based fuzzing provides the possibility to repeat test cases and their executions to further analyze specific test runs\cite{panda}.
 % 
 Often third party developers lack details of the device to implement good emulator. This makes building emulator requiring huge amounts of manual effort\cite{crash}. This is due to IoT devices being heavily dependent on their hardware\cite{firmafl}.
 \subsection{Partial Emulation}
 Partial emulation can lead to accurate vulnerability detection with decreased performance in comparison to full emulation, but possibly better performance than fuzzing the physical device, since it makes the fuzzing process more scalable\cite{crash}. Partial emulation is done by only emulating parts of the firmware or its peripheral devices.
 \subsection{Augmented Process Emulation}
 This method of emulation is proposed and used by Firm-AFL\cite{firmafl}. Augmented process emulation utilizes both system-level emulation and user-mode emulation to increase execution speed of the tested firmware/software. System-level emulation is only used when necessary, since it slows down execution. Currently augmented process emulation is limited to firmware that can be emulated in a system emulator and runs a POSIX-compatible operating system.
 \subsection{Optimized Virtual Execution}
 This technique used by Firmcorn\cite{firmcorn} executes firmware instructions in a lightweight CPU emulator. This approach circumvents the overhead generated by full-system emulation. The execution is further optimized by using heuristic algorithms like omitting unnecessary functions to optimize the execution process. Additionally the optimized virtual execution uses information about the context of the firmware. 
 \subsection{Symbolic Execution}
 A technique used to increase code coverage by using symbols as input and tracking manipulations and comparisons of them during runtime\cite{avatar}. The usage of the input is then backtracked to solve the constraints of specific code branches if a desired state is reached. Symbolic execution has the problem of path explosions and constraint solving, which poses as an obstacle to scalability\cite{angora}. Path explosions is the exponential increase of code branches the larger the program is. A part of this problem are possible infinite loops. Constraint solving can pose another challenge, since depending on ``how deep'' the programs tracking goes, the calculation of the constraints of a specific branch can be complex.%\textbf{citation needed}.
 \subsection{Liveness Check}
 By checking for liveness a fuzzer can determine the state of an IoT device. This is done actively by sending regular heartbeat messages to the device or passively by checking for expected responses of the IoT device.
 % 
 Liveness checks may cause timeouts to be detected as crashes, which slows down fuzzing. Omitting active liveness check improves performance, since probing packages aren't sent, which make up a certain percentage of traffic that do not contribute to the detection of vulnerabilities during the fuzzing process.
 \subsection{Taint analysis}
 Taint analysis is used to track data of interest during execution. The data that is being tracked is called taint source. IoTFuzzer\cite{iotfuzzer} uses taint analysis to track e.g.\ user input to find out which input influences network messages sent to the analyzed IoT device.
 \subsection{Stateful Message Generation}
 This technique was introduced by SIoTFuzzer\cite{siotfuzzer} which fuzzes web interfaces of IoT devices. Stateful Message Generation (SMG) is divided into three parts: front-end analysis, state analysis and seed generation. 
 % 
-SMG considers that communication depends on certain states and therefore groups together messages as a test case to fuzz the target system. So far SMG is only used to fuzz web interfaces in SIoTFuzzer\cite{siotfuzzer}.
+SMG considers that communication depends on certain states and therefore groups together messages as a test case to fuzz the target system. So far, SMG is only used to fuzz web interfaces in SIoTFuzzer\cite{siotfuzzer}.
-\subsection{Vulnerability-oriented fuzzing}
+
-Vulnerability-oriented fuzzing is used in Firmcorn\cite{firmcorn}. For this method, static analysis is used to find vulnerable code. Vulnerable code is determined by multiple factors like, complexity, number of memory operations and call to sensitive functions. 
+\subsection{Instrumentation}
-\subsection{Coverage-oriented fuzzing}
+\subsubsection{Binary Rewriting/Instrumentation}
-Coverage-oriented fuzzing generates input with the traversion of different execution paths in mind. This is done to maximize code coverage to reach paths which may be vulnerable. This is done by taking the ability of an input to trigger new paths into account\cite{vuzzer}.  While coverage guided fuzzing tries to maximize code coverage, usually most of a softwares code is not vulnerable, therefore a lot of resources are spent on exploring paths, which are not vulnerable. 
+Binary rewriting can be used to add instrumentation to firmware\cite{crash}. Instrumentation can be used to, e.g.\ add hooks to specific functions. This is interesting for fuzzing once instrumentation is added to internal exceptions to check for crashes or otherwise unexpected behaviour\cite{firmcorn}. To perform binary rewriting, disassembly of the firmware is necessary, which requires partial decompilation. An additional challenge is the missing room for additional instrumentation due to embedded devices being optimized for their memory usage\cite{crash}.
-\subsection{Directed fuzzing}
+\subsubsection{Symbolic Execution}
 A technique used to increase code coverage by using symbols as input and tracking manipulations and comparisons of them during runtime\cite{avatar}. The usage of the input is then backtracked to solve the constraints of specific code branches if a desired state is reached. Symbolic execution has the problem of path explosions and constraint solving, which poses as an obstacle to scalability\cite{angora}. Path explosions is the exponential increase of code branches the larger the program is. A part of this problem are possible infinite loops. Constraint solving can pose another challenge, since depending on ``how deep'' the program's tracking goes, the calculation of the constraints of a specific branch can be very complex\cite{vuzzer}.
 \subsubsection{Taint analysis}
 Taint analysis is used to track data of interest during execution. The data that is being tracked is called taint source. IoTFuzzer\cite{iotfuzzer} uses taint analysis to track, e.g.\ user input to find out which input influences network messages sent to the analysed IoT device.
 \subsection{Emulation}
 \subsubsection{Full Emulation}
 Emulation tackles the problems of throughput and scalability in IoT fuzzing. This is done by improving the performance, success rate and hardware indipendence of fuzzers\cite{snipuzz}. Full emulation of the firmware, with the help of heuristics, mitigates the lack of fault detection and increases accuracy of found vulnerabilities to a level of desktop system application fuzzers. Additionally, emulation based fuzzing provides the possibility to repeat test cases and their executions to further analyse specific test runs\cite{panda}.
 % 
 Often third party developers lack details of the device to implement a good emulator. This makes building emulator require huge amounts of manual effort\cite{crash}, due to IoT devices heavily dependence on their hardware\cite{firmafl}. Failing to emulate even a part of a device or its peripherals may lead to the firmware not running at all\cite{firmfuzz}. 
 \subsubsection{Partial Emulation}
 Partial emulation can lead to accurate vulnerability detection with decreased performance in comparison to full emulation but possibly better performance than fuzzing the physical device, since it makes the fuzzing process more scalable\cite{crash}. Partial emulation is done by only emulating parts of the firmware or its peripheral devices.
 \subsubsection{Augmented Process Emulation}
 This method of emulation is proposed and used by Firm-AFL\cite{firmafl}. Augmented process emulation utilizes both system-level emulation and user-mode emulation to increase execution speed of the tested firmware or software. Here system-level emulation is only used when necessary, due to its low speed, while user-mode emulation is used the rest of the time. This improves the overall throughput of fuzzers utilizing Augmented Process Emulation compared to fuzzers using emulators, that only make use of system emulation. Currently, Augmented Process Emulation is limited to firmware that can be emulated in a system emulator and runs a POSIX-compatible operating system.
 \subsubsection{Optimized Virtual Execution}
 This technique used by Firmcorn\cite{firmcorn}, where the firmware instructions are executed in a lightweight CPU emulator. This approach circumvents the overhead generated by full-system emulation. The execution is further optimized by using heuristic algorithms like omitting unnecessary functions to optimize the execution process.
 \subsection{Code Coverage}
 \subsubsection{Vulnerability-oriented fuzzing}
 Vulnerability-oriented fuzzing is used in Firmcorn\cite{firmcorn}. For this method, static analysis is used to find vulnerable code. Vulnerable code is determined by multiple factors like, complexity, number of memory operations and call to sensitive functions. Those attributes are calculated based on information about the target's control flow, like the number of edges of a function or the cyclomatic complexity of a function.
 \subsubsection{Coverage-oriented fuzzing}
 Coverage-oriented fuzzing generates input with the traversion of different execution paths in mind. This is done to maximize code coverage to reach paths which may be vulnerable by taking the ability of an input to trigger new paths into account\cite{vuzzer}.  While coverage guided fuzzing tries to maximize code coverage, usually most of a software's code is not vulnerable, therefore a lot of resources are spent on exploring invulnerable code paths.
 \subsubsection{Directed fuzzing}
 Direct fuzzing is the process of generating input with the goal of traversing specific execution paths\cite{vuzzer}. 
-Since only a fraction of firmware code has vulnerabilities the graybox approach to fuzzing by increasing code coverage leads to test cases, which end up not finding vulnerabilities\cite{firmcorn}.
+Since only a fraction of firmware code has vulnerabilities, the grey box approach to fuzzing by only focusing on code coverage leads to many test cases, that end up not finding vulnerabilities\cite{firmcorn}.
 \subsection{Crash Detection}
 \subsubsection{Active Probing}
 Active probing is used to determine the state of the target by regularly sending messages to the target.
 The response of the target to such a message is known. Should the response deviate from the expected message or should the device not respond at all, it can be assumed that there is an error.
 While this probing method can detect errors that do not lead to crashes, the probing messages could lead to unexpected states of the target themselves. Sending additional messages to probe for the liveness of the target, also slows down the overall fuzzing process, since such probing messages do not contribute to increasing the coverage of the target.
 \subsubsection{Passive Probing}
 During passive probing the messages, that are sent for fuzz testing, are used to determine the state of the target.
 While the target device responds in an acceptable time window, it assumed, that no crash has occurred. 
 \section{Related Work}
 \subsection{Static Analysis}
 Alternatively to fuzzing, there are other ways to test software for vulnerabilities such as static firmware analysis. Static firmware analysis is the analysis of firmware without executing it by using tools like binwalk\cite{binwalk} to unpack the firmware and reverse engineering it with a reverse engineering tool like IDA\cite{IDA}\cite{firmup}. The advantage of static analysis is the possibility to automate and scale the processes of analysing the firmware\cite{largeanal}, since the testing does not depend on a physical device. On the other hand, static analysis also yields a high amount of false positives and may not find completely new vulnerabilities with the usage of its heuristics\cite{firmafl}. Another challenge during static analysis is the handling of packed or obfuscated code, since it first has to be unpacked or deobfuscated to perform meaningful analysis on it\cite{largeanal}. 
 \subsubsection{Dynamic Analysis}
 Dynamic Firmware analysis is another alternative to fuzzing. For dynamic analysis, the firmware is executed to be investigated. This can be done in a multitude of ways. For example, by running the firmware on the original device or emulating the device to have the firmware run in a virtual environment. The running firmware's behaviour is then analysed\cite{autodyn}. The challenge of working with packed or obfuscated code during static firmware analysis can be overcome  with dynamic analysis\cite{avatar} by emulating the physical device, which increases scalability and eliminates the need to acquire the physical device to test it\cite{autodyn}.
 \section{Conclusion}
-In this paper we created an overview of the different IoT fuzzing techniques used by state of the art IoT fuzzing tools and compared their approaches in regards to input generation, crash detection heuristics and their device scopes.
+In this paper we created an overview of the different IoT fuzzing techniques used by state-of-the-art IoT fuzzing tools and compared their approaches in regard to input generation, crash detection and their device scopes.
 % 
-The IoT fuzzer we looked at, utilized many techniques to make use of many attack surfaces and even used software outside the IoT devices themselves to gain information about the device, like IoTFuzzer\cite{iotfuzzer} which used the devices companion app to send fuzzing messages to the tested device.
+The IoT fuzzers we looked at, utilized many techniques to make use of many attack surfaces and even used software outside the IoT devices themselves to gain information about the device, like IoTFuzzer\cite{iotfuzzer}.
 %
 There were also fuzzers, which did not create a new approach to fuzzing itself, but applied existing fuzzing techniques to the field of IoT fuzzing.
 All in all, there are many techniques used in the field of IoT fuzzing, including some that are even outside the field of conventional fuzzing, such as symbolic execution, which belongs more in the class of dynamic analysis techniques. This makes fuzzing a very diverse topic for research, in which there is a lot of room for improvement.
 \printbibliography{}
 \section{Appendix}
 \subsection{Reconnaissance}
 To gain information about the system, we start off with \textit{nmap}.
 The result of the port scan resulted in 6 open ports: 22 (ssh), 53 (dns), 80 (http), 443 (https), 5515 (unknown) and 65534 (unknown). Knowing that there was a backdoor service on this device, it was probably on either port 5515 or 65534, since those are not part of the IANA well-known ports.
 \begin{figure}[h]
  \begin{lstlisting}[language=sh,firstnumber=0,breaklines=true]
    nmap $TARGET -p-    
    nmap $TARGET -sV -sC -p22,53,80,443,5515, 65534
  \end{lstlisting}
  \caption{SYN scan all ports and detailed scan over open ports.}\label{lst:nmap}
 \end{figure}
 \subsection{Getting shell and adding user}
 Connecting with port 5515 via \textit{netcat} returned a root user shell.
 To add a user, I simply edited the \textit{/etc/shadow} and \textit{/etc/passwd} file by adding one entry in each file. The entry for the \textit{/etc/passwd}-file contained the username, uid etc.\ and the other one contained the username, md5 hashed password etc.\ for the \textit{/etc/shadow}-file.
 \begin{figure}[h]
  \begin{lstlisting}[language=sh,firstnumber=0,breaklines=true]
    echo "echo tuan:x:1001:1001::/root:/bin/ash >> /etc/passwd;exit" | nc -nv $TARGET 5515 
    echo 'echo tuan:\$1\$123456\$qqQvjw0PqIk7otmzNsUIN0:18145:0:99999:7::: >> /etc/shadow;exit' | nc -nv $TARGET 5515
  \end{lstlisting}
  \caption{Adding user ``tuan'' with the password ``password''.}\label{lst:useradd}
 \end{figure}
 To check whether the user was added correctly, I logged in via SSH with the new user.
 \subsection{SSH Brute-force}
 For brute forcing the SSH login for ``iotgoatuser'' I used \textit{hydra}.
 An alternative to brute forcing over ssh would be getting the \textit{/etc/shadow} and \textit{/etc/passwd} files and cracking the passwords of its users locally with tools like \textit{JohnTheRipper} or \textit{Hashcat}. This method would circumvent defence mechanisms like \textit{fail2ban}, although the usage of such defence mechanisms is unlikely on an IoT device.
 \begin{figure}[h]
  \begin{lstlisting}[language=sh,firstnumber=0,breaklines=true]
     hydra -l iotgoatuser -P ./data/passwords.txt ssh://TARGET -t 4 -f
  \end{lstlisting}
  \caption{Brute-force ssh}\label{lst:hydra}
 \end{figure}
 \subsection{MITM}
 When visiting the web-interface of the IoT device via Firefox, we are greeted with a warning, that the certificate is self-signed. This poses a threat to the user, since self-signed certificates can not be revoked and don't expire.
 If the certificate was somehow leaked, the integrity of the website could not be restored without replacing the certificate, which may not be easily done on an IoT device sold to hundreds or thousands of consumers. Self-signed certificates are used nonetheless on IoT devices, since they are easier to obtain and free of charge.
 To proceed with the testing, I had to simply press ``Accept the Risk and Continue'' in the browser.
 Logging in on the web interface with the credentials we obtained in the brute forcing step didn't seem to have worked.
 I then tried brute forcing the login form with burpsuites ``Intruder'' function and a wordlist from SecLists\footnote{https://github.com/danielmiessler/SecLists}, using the root-user and the iotgoatuser-user, which didn't work either.
 Using the backdoor, I then changed the root password to ``asdfasdf'', since the already existing password didn't seem easily crackable. Logging in with the new credentials worked.
 Looking at the \textit{luci}-directory in \textit{/usr/lib/lua/luci/} we found \textit{/usr/lib/lua/luci/controller/iotgoat/iotgoat.lua}, which lists the secret developer page under \textit{https://\$TARGET/cgi-bin/luci/admin/iotgoat/cmdinject}.
 \subsection{Static analysis}
 To start the static analysis, we first extract and unpack the filesystem by finding the filesystem in the firmware with \textit{binwalk}, extracting it with \textit{dd} and unpacking it with \textit{unsquashfs}. This gives us access to the whole file system of the IoT device we are analysing.
 \begin{figure}[h]
  \begin{lstlisting}[language=sh,firstnumber=0,breaklines=true]
      binwalk ./data/Syssec\ IoT\ Device.bin
      dd if=data/Syssec\ IoT\ Device.bin of=data/0x1F5A50 bs=1 skip=2054736 count=2813038
      unsquashfs data/0x1F5A50
  \end{lstlisting}
  \caption{Extracting and unpacking filesystem}\label{lst:static}
 \end{figure}
 To find the shadow and passwd file, we can simply run a \textit{find} command to look for them or, by simply knowing, that they are usually in the \textit{/etc/} directory.
 The same can be done for the certificate to find a certificate in \textit{/etc/ssl/certs/ca-certificates.crt}.
 \subsection{Write-up}
 The full write-up can be found here:
 \url{https://git.uni-due.de/sktatran/syssec-embedded-security-writeup/-/blob/main/writeup.org}
 \end{document}
--- a/paper.txt
+++ b/paper.txt