import React from "react";
import "./mcu_hal22.css";
import Popup from "reactjs-popup";
import {NavLink} from "react-router-dom";
import {Button} from "../../../../common";
import AtomOneDark from "react-syntax-highlighter/src/styles/hljs/atom-one-dark";
import SyntaxHighlighter from "react-syntax-highlighter";

import dma_arch_1 from "./dma_arch1.jpg";
import dma_arch_2 from "./dma_arch2.jpg";
import dma_m2m_setup_1 from "./dma_m2m_setup_1.jpg";
import dma_m2m_setup_1_irq from "./dma_m2m_setup_1_irq.jpg";
import dma_m2m_setup_2 from "./dma_m2m_setup_2.jpg";
import dma_logic from "./dma_logic.jpg";

const loop_copy_setup = `
uint16_t src_vector[1024], dst_vector[1024];

for(uint_fast16_t i = 0; i < 1024; ++i){
  src_vector[i] = i+1;
  dst_vector[i] = 0x0000;
}
`;

const gpio_toggle = `
HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_SET);
HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_RESET);
`;

const loop_copy_cycle =`
HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_SET);

for(uint_fast16_t i = 0; i < 1024; ++i)
  dst_vector[i] = src_vector[i];

HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_RESET);

for(uint_fast16_t i = 0; i < 1024; ++i){
  dst_vector[i] = 0x0000;
}
`;

const memcpy_test = `
HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_SET);
memcpy(dst_vector, src_vector, 2048);
HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_RESET);

for(uint_fast16_t i = 0; i < 1024; ++i){
  dst_vector[i] = 0x0000;
}
`;

const copy_irq = `
void DMA1_Channel1_TransferComplete(DMA_HandleTypeDef *DmaHandle){
    dma_flag = 1;
    HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_RESET);
}
`;

const copy_dma = `
HAL_DMA_RegisterCallback(&hdma_memtomem_dma1_channel1, HAL_DMA_XFER_CPLT_CB_ID, DMA1_Channel1_TransferComplete);
dma_flag = 0;
HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_SET);
HAL_DMA_Start_IT(&hdma_memtomem_dma1_channel1, (uint32_t)&src_vector, (uint32_t) &dst_vector, 1024);
while(!dma_flag){}
`;

const gpio_setup = `
uint8_t parallel_reg[16] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
                              0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF};

HAL_GPIO_WritePin(PP0_GPIO_Port, PP0_Pin, GPIO_PIN_RESET);
HAL_GPIO_WritePin(PP1_GPIO_Port, PP1_Pin, GPIO_PIN_RESET);
HAL_GPIO_WritePin(PP2_GPIO_Port, PP2_Pin, GPIO_PIN_RESET);
HAL_GPIO_WritePin(PP3_GPIO_Port, PP3_Pin, GPIO_PIN_RESET);
HAL_GPIO_WritePin(STRB_GPIO_Port, STRB_Pin, GPIO_PIN_RESET);

while(HAL_GPIO_ReadPin(B1_GPIO_Port, B1_Pin) != GPIO_PIN_SET){}
HAL_Delay(10);
while(HAL_GPIO_ReadPin(B1_GPIO_Port, B1_Pin) != GPIO_PIN_RESET){}
HAL_DMA_RegisterCallback(&hdma_memtomem_dma2_channel1, HAL_DMA_XFER_CPLT_CB_ID, DMA2_Channel1_TransferComplete);
HAL_GPIO_WritePin(STRB_GPIO_Port, STRB_Pin, GPIO_PIN_SET);
HAL_DMA_Start_IT(&hdma_memtomem_dma2_channel1, parallel_reg, (uint32_t) &GPIOC->ODR, 16);
while(!dma_flag){}
`;


function MCU_HAL22(){
    return(
        <div className="em__post">
            <div className="em__post-title">
                <h1>Direct memory access</h1>
            </div>

            <div className="em__post-section">
                <h3>Aim of this tutorial:</h3>
                <p>
                    In this tutorial, we will take a closer look at the direct memory access (DMA) of the STM32G4 MCU.
                    In particular, we will use the memory-to-memory functionality to copy data from a memory location to another,
                    and to create a fast parallel digital interface.
                </p>
            </div>

            <div className="em__post-section">
                <h3>About DMA in general:</h3>
                <p>
                    A DMA unit is a logic element that can be used alongside the processor to offload memory operations.
                    The DMA can transfer data from memory regions to periphery or to other memory regions (and vice versa) reducing the CPU load.
                </p>
                <p>
                    In architectures without DMA (like <a href="https://binaryupdates.com/interview-question-answers-based-on-8051-microcontroller/">this</a>,
                    or <a href="https://www.cnx-software.com/2016/04/06/pulpino-open-source-risc-v-mcu-is-designed-for-iot-and-wearables/"> this</a>),
                    the CPU is connected to the RAM and to the peripherals using a single bus system.
                    Here, the CPU has to do all the fetching, decoding and execution of the instructions that reside in the instruction memory.
                    Moreover, it also has to move data to and from peripherals and memory.
                    If we are adding interrupts into the system, the huge amount of context switching wastes precious instruction cycles.
                </p>

                <Popup trigger={<img className="img_dma_arch_2_hal22 clickable_img" src={dma_arch_2} alt="dma_arch_2_hal22"/>} modal nested>
                    {close => (
                        <img className="em__img_full" src={dma_arch_2} alt="dma_arch_2_hal22" />
                    )}
                </Popup>
                <p>
                    Now consider the DMA unit, which has direct connection to the main internal bus (AHB1 in our case),
                    and thus direct access to the RAM, and peripherals.
                    This parallel connection between DMA and CPU creates opportunities for acceleration.

                    Of course, there are some downsides too. If the CPU has cache memory, when the DMA accesses a memory region mirrored in the cache,
                    it will invalidate the cache data after a write operation.
                </p>

                <p>
                    In STM32G4, the DMA controller is a single AHB master and has access to memory-mapped devices such as Flash, SRAM, AHB or APB peripherals.
                    All DMA channels are associated with peripheral request or software trigger-able transfer signals.
                    Every request has a separately programmable priority.
                    The transfer size of source and destination memory are independent (byte, half-word, word),
                    but source and destination addresses must be aligned on the data size.
                    The number of transferable data can be programmed from 0 to 65535.
                    The DMA controller can generate transfer complete, half transfer or transfer error interrupts.
                </p>

                <Popup trigger={<img className="img_dma_arch_1_hal22 clickable_img" src={dma_arch_1} alt="dma_arch_1_hal22"/>} modal nested>
                    {close => (
                        <img className="em__img_full" src={dma_arch_1} alt="dma_arch_1_hal22" />
                    )}
                </Popup>

                <p>
                    Here we can see a closeup of the two DMA modules implemented in the STM32G4 architecture.
                </p>
            </div>

            <div className="em__post-section">
                <h3>Measuring DMA Mem2Mem transfer speed - Hardware:</h3>
                <p>
                    So far we have used the DMA in memory-to-peripheral and peripheral-to-memory mode when we were talking about the ADC, DAC and UART.
                    This time, we will use the memory-to-memory functionality, which enables us great transfer speeds. Let's measure that 'great' speed.
                    <br/>
                    Create a new project with the usual settings and 170MHz system clock. Go to <i>System Core</i>/<i>DMA</i>, add a new <i>MEMTOMEM</i> item.
                </p>
                <img className="img_dma_m2m_setup_1_hal22" src={dma_m2m_setup_1} alt="dma_m2m_setup_1_hal22"/>
                <p>
                    The mode should be Normal, the Data width half world, and both the source and destination memory addresses must be incremented,
                    otherwise we would either copy the first source data, or we would overwrite the same destination register.
                </p>
                <img className="img_dma_m2m_setup_1_irq_hal22" src={dma_m2m_setup_1_irq} alt="dma_m2m_setup_1_irq_hal22"/>
                <p>
                    Enable the proper DMA interrupt, and generate the project.
                </p>
            </div>

            <div className="em__post-section">
                <h3>Measuring DMA Mem2Mem transfer speed - Firmware:</h3>
                <p>
                    Let's create two large arrays for the copy measurements.
                </p>
                <SyntaxHighlighter language="c" style={AtomOneDark}>
                    {loop_copy_setup}
                </SyntaxHighlighter>
                <p>
                    Next we can measure the time of a simple HAL GPIO toggle.
                </p>

                <SyntaxHighlighter language="c" style={AtomOneDark}>
                    {gpio_toggle}
                </SyntaxHighlighter>

                <p>
                    Upload this code and measure the toggle time using a logic analyser or an oscilloscope hooked onto CN7/34 pin.
                    In my case, using 170MHz and HAL, the toggle time was 83ns.
                </p>

                <SyntaxHighlighter language="c" style={AtomOneDark}>
                    {loop_copy_cycle}
                </SyntaxHighlighter>

                <p>
                    Now we can measure the copy time of 1024 elements using a single for loop.
                    In my case, this operation was done in 60.25us, so the overall transfer time is 60.17us.
                    <br/>
                    Another method for data copy is the <i>memcpy</i> from <i>sting.h</i>, so let's test that one too:
                </p>
                <SyntaxHighlighter language="c" style={AtomOneDark}>
                    {memcpy_test}
                </SyntaxHighlighter>
                <p>
                    Here you can see that the size is 2048, because the memcpy works with bytes, and our data has half-word width (16b), so twice.
                    The measured execution time was 7.625us, so the overall transfer time is 7.542us - 12% of the previous time.
                    <br/>
                    Now we can try to use the DMA for memory copy.
                </p>
                <SyntaxHighlighter language="c" style={AtomOneDark}>
                    {copy_irq}
                </SyntaxHighlighter>
                <p>
                    First we create the interrupt callback, in which we can signal the main context that the copy is completed.
                    This time though, we have to specify that this is the function that needs to be called.
                </p>
                <SyntaxHighlighter language="c" style={AtomOneDark}>
                    {copy_dma}
                </SyntaxHighlighter>
                <p>
                    <i>HAL_DMA_RegisterCallBack</i> registers our functions to the specified interrupt of the specified DMA channel.
                    <i>HAL_DMA_Start_IT</i> starts the copy procedure, and we wait for the completion flag.
                    The time between the GPIO state changes was 38.167us, so the execution time is 38.08us.
                    This is faster than the first method but slower than the memcpy method.

                    Although the execution time is five times greater than that of the memcpy,
                    the memcpy method is a blocking function, while the DMA is non-blocking,
                    so we can execute some code until the copy procedure is finished.
                </p>
            </div>

            <div className="em__post-section">
                <h3>Parallel Digital output using DMA:</h3>
                <p>
                    Now that we saw how to work with DMA based memory copy, we can look at how to apply the same technique to memory mapped peripheral.
                    For this, I opted for DMA2.
                </p>
                <img className="img_dma_m2m_setup_2_hal22" src={dma_m2m_setup_2} alt="dma_m2m_setup_2_hal22"/>
                <p>
                    Here we need a DMA access in normal mode with byte data width (we will be using 4b so this is the smallest size).
                    Also, don't forget to uncheck the destination address increment.
                    We don't want to increment the GPIO address, that could put the address pointer into an unwanted region.
                    Also, configure PC0..3 to digital outputs (I named them PP0..3 - from parallel port).
                </p>
                <SyntaxHighlighter language="c" style={AtomOneDark}>
                    {gpio_setup}
                </SyntaxHighlighter>
                <p>
                    Here I have output array with 16 elements, we want this data to appear on the output port.
                    First we reset every pin, then we are waiting for a button event.
                    The rest of the code is the same as before: register a callback function for the DMA in which you reset the control pin.
                    Set the control pin, then start the DMA transfer using IT mode. This time, the destination is the <i>GPIOC->ODR</i> address,
                    which sets the output of the PC GPIO.
                </p>
                <img className="img_dma_logic_hal22" src={dma_logic} alt="dma_logic_hal22"/>
                <p>
                    With a frequency of 12MHz we achieved a communication speed of 6MBps.
                    Of course, there should be a sync or read/write pulse so that the other device can know when to sample the data.
                    This could be done using a Timer module with output compare and DMA.
                    <br/>
                    The other thing that we see is that the STRB signal set time and the start of the transmission has a huge delay.
                    Same in the case of the reset.
                    This is the price of the context switching and the HAL functions.
                </p>
                <p>
                    Either way, the code for this tutorial can be found in <a href="https://gitlab.com/stm32_mcu_group/stm32_hal/22_dma_mem2mem.git">this</a> repo.
                </p>
            </div>



            <div className="em__post-navigation">

                <NavLink to="./../stm-hal-21">
                    <Button btnID={"leftBTN"} buttonSize="btn--medium"> Previous Post</Button>
                </NavLink>

                <NavLink to="./../stm-hal-23">
                    <Button btnID={"rightBTN"} buttonSize="btn--medium"> Next Post</Button>
                </NavLink>
            </div>
        </div>
    );
}

export default MCU_HAL22;