1. /**
    
  2.  * Copyright (c) Meta Platforms, Inc. and affiliates.
    
  3.  *
    
  4.  * This source code is licensed under the MIT license found in the
    
  5.  * LICENSE file in the root directory of this source tree.
    
  6.  */
    
  7. 
    
  8. #include "hardware-counter.h"
    
  9. 
    
  10. #ifndef NO_HARDWARE_COUNTERS
    
  11. 
    
  12. #define _GNU_SOURCE 1
    
  13. #include <stdio.h>
    
  14. #include <stdlib.h>
    
  15. #include <string.h>
    
  16. #include <unistd.h>
    
  17. #include <fcntl.h>
    
  18. #include <errno.h>
    
  19. #include <assert.h>
    
  20. #include <sys/mman.h>
    
  21. #include <sys/ioctl.h>
    
  22. #include <asm/unistd.h>
    
  23. #include <sys/prctl.h>
    
  24. #include <linux/perf_event.h>
    
  25. 
    
  26. namespace HPHP {
    
  27. ///////////////////////////////////////////////////////////////////////////////
    
  28. 
    
  29. IMPLEMENT_THREAD_LOCAL_NO_CHECK(HardwareCounter,
    
  30.     HardwareCounter::s_counter);
    
  31. 
    
  32. static bool s_recordSubprocessTimes = false;
    
  33. static bool s_profileHWEnable;
    
  34. static std::string s_profileHWEvents;
    
  35. 
    
  36. static inline bool useCounters() {
    
  37. #ifdef VALGRIND
    
  38.   return false;
    
  39. #else
    
  40.   return s_profileHWEnable;
    
  41. #endif
    
  42. }
    
  43. 
    
  44. class HardwareCounterImpl {
    
  45. public:
    
  46.   HardwareCounterImpl(int type, unsigned long config,
    
  47.                       const char* desc = nullptr)
    
  48.     : m_desc(desc ? desc : ""), m_err(0), m_fd(-1), inited(false) {
    
  49.     memset (&pe, 0, sizeof (struct perf_event_attr));
    
  50.     pe.type = type;
    
  51.     pe.size = sizeof (struct perf_event_attr);
    
  52.     pe.config = config;
    
  53.     pe.inherit = s_recordSubprocessTimes;
    
  54.     pe.disabled = 1;
    
  55.     pe.pinned = 0;
    
  56.     pe.exclude_kernel = 0;
    
  57.     pe.exclude_hv = 1;
    
  58.     pe.read_format =
    
  59.       PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING;
    
  60.     }
    
  61. 
    
  62.   ~HardwareCounterImpl() {
    
  63.     close();
    
  64.   }
    
  65. 
    
  66.   void init_if_not() {
    
  67.     /*
    
  68.      * perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid,
    
  69.      *                 int cpu, int group_fd, unsigned long flags)
    
  70.      */
    
  71.     if (inited) return;
    
  72.     inited = true;
    
  73.     m_fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0);
    
  74.     if (m_fd < 0) {
    
  75.       // Logger::Verbose("perf_event_open failed with: %s",
    
  76.       //                 folly::errnoStr(errno).c_str());
    
  77.       m_err = -1;
    
  78.       return;
    
  79.     }
    
  80.     if (ioctl(m_fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
    
  81.       // Logger::Warning("perf_event failed to enable: %s",
    
  82.       //                 folly::errnoStr(errno).c_str());
    
  83.       close();
    
  84.       m_err = -1;
    
  85.       return;
    
  86.     }
    
  87.     reset();
    
  88.   }
    
  89. 
    
  90.   int64_t read() {
    
  91.     uint64_t values[3];
    
  92.     if (readRaw(values)) {
    
  93.       if (!values[2]) return 0;
    
  94.       int64_t value = (double)values[0] * values[1] / values[2];
    
  95.       return value + extra;
    
  96.     }
    
  97.     return 0;
    
  98.   }
    
  99. 
    
  100.   void incCount(int64_t amount) {
    
  101.     extra += amount;
    
  102.   }
    
  103. 
    
  104.   bool readRaw(uint64_t* values) {
    
  105.     if (m_err || !useCounters()) return false;
    
  106.     init_if_not();
    
  107. 
    
  108.     if (m_fd > 0) {
    
  109.       /*
    
  110.        * read the count + scaling values
    
  111.        *
    
  112.        * It is not necessary to stop an event to read its value
    
  113.        */
    
  114.       auto ret = ::read(m_fd, values, sizeof(*values) * 3);
    
  115.       if (ret == sizeof(*values) * 3) {
    
  116.         values[0] -= reset_values[0];
    
  117.         values[1] -= reset_values[1];
    
  118.         values[2] -= reset_values[2];
    
  119.         return true;
    
  120.       }
    
  121.     }
    
  122.     return false;
    
  123.   }
    
  124. 
    
  125.   void reset() {
    
  126.     if (m_err || !useCounters()) return;
    
  127.     init_if_not();
    
  128.     extra = 0;
    
  129.     if (m_fd > 0) {
    
  130.       if (ioctl (m_fd, PERF_EVENT_IOC_RESET, 0) < 0) {
    
  131.         // Logger::Warning("perf_event failed to reset with: %s",
    
  132.         //                 folly::errnoStr(errno).c_str());
    
  133.         m_err = -1;
    
  134.         return;
    
  135.       }
    
  136.       auto ret = ::read(m_fd, reset_values, sizeof(reset_values));
    
  137.       if (ret != sizeof(reset_values)) {
    
  138.         // Logger::Warning("perf_event failed to reset with: %s",
    
  139.         //                 folly::errnoStr(errno).c_str());
    
  140.         m_err = -1;
    
  141.         return;
    
  142.       }
    
  143.     }
    
  144.   }
    
  145. 
    
  146. public:
    
  147.   std::string m_desc;
    
  148.   int m_err;
    
  149. private:
    
  150.   int m_fd;
    
  151.   struct perf_event_attr pe;
    
  152.   bool inited;
    
  153.   uint64_t reset_values[3];
    
  154.   uint64_t extra{0};
    
  155. 
    
  156.   void close() {
    
  157.     if (m_fd > 0) {
    
  158.       ::close(m_fd);
    
  159.       m_fd = -1;
    
  160.     }
    
  161.   }
    
  162. };
    
  163. 
    
  164. class InstructionCounter : public HardwareCounterImpl {
    
  165. public:
    
  166.   InstructionCounter() :
    
  167.     HardwareCounterImpl(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS) {}
    
  168. };
    
  169. 
    
  170. class LoadCounter : public HardwareCounterImpl {
    
  171. public:
    
  172.   LoadCounter() :
    
  173.     HardwareCounterImpl(PERF_TYPE_HW_CACHE,
    
  174.         (PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8))) {}
    
  175. };
    
  176. 
    
  177. class StoreCounter : public HardwareCounterImpl {
    
  178. public:
    
  179.   StoreCounter() :
    
  180.     HardwareCounterImpl(PERF_TYPE_HW_CACHE,
    
  181.         PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_WRITE) << 8)) {}
    
  182. };
    
  183. 
    
  184. HardwareCounter::HardwareCounter()
    
  185.   : m_countersSet(false) {
    
  186.   m_instructionCounter.reset(new InstructionCounter());
    
  187.   if (s_profileHWEvents.empty()) {
    
  188.     m_loadCounter.reset(new LoadCounter());
    
  189.     m_storeCounter.reset(new StoreCounter());
    
  190.   } else {
    
  191.     m_countersSet = true;
    
  192.     setPerfEvents(s_profileHWEvents);
    
  193.   }
    
  194. }
    
  195. 
    
  196. HardwareCounter::~HardwareCounter() {
    
  197. }
    
  198. 
    
  199. void HardwareCounter::Init(bool enable, const std::string& events,
    
  200.                            bool subProc) {
    
  201.   s_profileHWEnable = enable;
    
  202.   s_profileHWEvents = events;
    
  203.   s_recordSubprocessTimes = subProc;
    
  204. }
    
  205. 
    
  206. void HardwareCounter::Reset() {
    
  207.   s_counter->reset();
    
  208. }
    
  209. 
    
  210. void HardwareCounter::reset() {
    
  211.   m_instructionCounter->reset();
    
  212.   if (!m_countersSet) {
    
  213.     m_storeCounter->reset();
    
  214.     m_loadCounter->reset();
    
  215.   }
    
  216.   for (unsigned i = 0; i < m_counters.size(); i++) {
    
  217.     m_counters[i]->reset();
    
  218.   }
    
  219. }
    
  220. 
    
  221. int64_t HardwareCounter::GetInstructionCount() {
    
  222.   return s_counter->getInstructionCount();
    
  223. }
    
  224. 
    
  225. int64_t HardwareCounter::getInstructionCount() {
    
  226.   return m_instructionCounter->read();
    
  227. }
    
  228. 
    
  229. int64_t HardwareCounter::GetLoadCount() {
    
  230.   return s_counter->getLoadCount();
    
  231. }
    
  232. 
    
  233. int64_t HardwareCounter::getLoadCount() {
    
  234.   return m_loadCounter->read();
    
  235. }
    
  236. 
    
  237. int64_t HardwareCounter::GetStoreCount() {
    
  238.   return s_counter->getStoreCount();
    
  239. }
    
  240. 
    
  241. int64_t HardwareCounter::getStoreCount() {
    
  242.   return m_storeCounter->read();
    
  243. }
    
  244. 
    
  245. void HardwareCounter::IncInstructionCount(int64_t amount) {
    
  246.   s_counter->m_instructionCounter->incCount(amount);
    
  247. }
    
  248. 
    
  249. void HardwareCounter::IncLoadCount(int64_t amount) {
    
  250.   if (!s_counter->m_countersSet) {
    
  251.     s_counter->m_loadCounter->incCount(amount);
    
  252.   }
    
  253. }
    
  254. 
    
  255. void HardwareCounter::IncStoreCount(int64_t amount) {
    
  256.   if (!s_counter->m_countersSet) {
    
  257.     s_counter->m_storeCounter->incCount(amount);
    
  258.   }
    
  259. }
    
  260. 
    
  261. struct PerfTable perfTable[] = {
    
  262.   /* PERF_TYPE_HARDWARE events */
    
  263. #define PC(n)    PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## n
    
  264.   { "cpu-cycles",              PC(CPU_CYCLES)              },
    
  265.   { "cycles",                  PC(CPU_CYCLES)              },
    
  266.   { "instructions",            PC(INSTRUCTIONS)            },
    
  267.   { "cache-references",        PC(CACHE_REFERENCES)        },
    
  268.   { "cache-misses",            PC(CACHE_MISSES)            },
    
  269.   { "branch-instructions",     PC(BRANCH_INSTRUCTIONS)     },
    
  270.   { "branches",                PC(BRANCH_INSTRUCTIONS)     },
    
  271.   { "branch-misses",           PC(BRANCH_MISSES)           },
    
  272.   { "bus-cycles",              PC(BUS_CYCLES)              },
    
  273.   { "stalled-cycles-frontend", PC(STALLED_CYCLES_FRONTEND) },
    
  274.   { "stalled-cycles-backend",  PC(STALLED_CYCLES_BACKEND)  },
    
  275. 
    
  276.   /* PERF_TYPE_HW_CACHE hw_cache_id */
    
  277. #define PCC(n)   PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_ ## n
    
  278.   { "L1-dcache-",          PCC(L1D)                },
    
  279.   { "L1-icache-",          PCC(L1I)                },
    
  280.   { "LLC-",                PCC(LL)                 },
    
  281.   { "dTLB-",               PCC(DTLB)               },
    
  282.   { "iTLB-",               PCC(ITLB)               },
    
  283.   { "branch-",             PCC(BPU)                },
    
  284. 
    
  285.   /* PERF_TYPE_HW_CACHE hw_cache_op, hw_cache_result */
    
  286. #define PCCO(n, m)  PERF_TYPE_HW_CACHE, \
    
  287.                     ((PERF_COUNT_HW_CACHE_OP_ ## n) << 8 | \
    
  288.                     (PERF_COUNT_HW_CACHE_RESULT_ ## m) << 16)
    
  289.   { "loads",               PCCO(READ, ACCESS)      },
    
  290.   { "load-misses",         PCCO(READ, MISS)        },
    
  291.   { "stores",              PCCO(WRITE, ACCESS)     },
    
  292.   { "store-misses",        PCCO(WRITE, MISS)       },
    
  293.   { "prefetches",          PCCO(PREFETCH, ACCESS)  },
    
  294.   { "prefetch-misses",     PCCO(PREFETCH, MISS)    }
    
  295. };
    
  296. 
    
  297. static int findEvent(const char *event, struct PerfTable *t,
    
  298.                      int len, int *match_len) {
    
  299.   int i;
    
  300. 
    
  301.   for (i = 0; i < len; i++) {
    
  302.     if (!strncmp(event, t[i].name, strlen(t[i].name))) {
    
  303.       *match_len = strlen(t[i].name);
    
  304.       return i;
    
  305.     }
    
  306.   }
    
  307.   return -1;
    
  308. }
    
  309. 
    
  310. #define CPUID_STEPPING(x)  ((x) & 0xf)
    
  311. #define CPUID_MODEL(x)     (((x) & 0xf0) >> 4)
    
  312. #define CPUID_FAMILY(x)    (((x) & 0xf00) >> 8)
    
  313. #define CPUID_TYPE(x)      (((x) & 0x3000) >> 12)
    
  314. 
    
  315. // hack to get LLC counters on perflab frc machines
    
  316. static bool isIntelE5_2670() {
    
  317. #ifdef __x86_64__
    
  318.   unsigned long x;
    
  319.   asm volatile ("cpuid" : "=a"(x): "a"(1) : "ebx", "ecx", "edx");
    
  320.   return CPUID_STEPPING(x) == 6 && CPUID_MODEL(x) == 0xd
    
  321.          && CPUID_FAMILY(x) == 6 && CPUID_TYPE(x) == 0;
    
  322. #else
    
  323.   return false;
    
  324. #endif
    
  325. }
    
  326. 
    
  327. static void checkLLCHack(const char* event, uint32_t& type, uint64_t& config) {
    
  328.   if (!strncmp(event, "LLC-load", 8) && isIntelE5_2670()) {
    
  329.     type = PERF_TYPE_RAW;
    
  330.     if (!strncmp(&event[4], "loads", 5)) {
    
  331.       config = 0x534f2e;
    
  332.     } else if (!strncmp(&event[4], "load-misses", 11)) {
    
  333.       config = 0x53412e;
    
  334.     }
    
  335.   }
    
  336. }
    
  337. 
    
  338. bool HardwareCounter::addPerfEvent(const char* event) {
    
  339.   uint32_t type = 0;
    
  340.   uint64_t config = 0;
    
  341.   int i, match_len;
    
  342.   bool found = false;
    
  343.   const char* ev = event;
    
  344. 
    
  345.   while ((i = findEvent(ev, perfTable,
    
  346.                         sizeof(perfTable)/sizeof(struct PerfTable),
    
  347.                         &match_len))
    
  348.        != -1) {
    
  349.     if (!found) {
    
  350.       found = true;
    
  351.       type = perfTable[i].type;
    
  352.     } else if (type != perfTable[i].type) {
    
  353.       // Logger::Warning("failed to find perf event: %s", event);
    
  354.       return false;
    
  355.     }
    
  356.     config |= perfTable[i].config;
    
  357.     ev = &ev[match_len];
    
  358.   }
    
  359. 
    
  360.   checkLLCHack(event, type, config);
    
  361. 
    
  362.   // Check if we have a raw spec.
    
  363.   if (!found && event[0] == 'r' && event[1] != 0) {
    
  364.     config = strtoull(event + 1, const_cast<char**>(&ev), 16);
    
  365.     if (*ev == 0) {
    
  366.       found = true;
    
  367.       type = PERF_TYPE_RAW;
    
  368.     }
    
  369.   }
    
  370. 
    
  371.   if (!found || *ev) {
    
  372.     // Logger::Warning("failed to find perf event: %s", event);
    
  373.     return false;
    
  374.   }
    
  375.   std::unique_ptr<HardwareCounterImpl> hwc(
    
  376.       new HardwareCounterImpl(type, config, event));
    
  377.   if (hwc->m_err) {
    
  378.     // Logger::Warning("failed to set perf event: %s", event);
    
  379.     return false;
    
  380.   }
    
  381.   m_counters.emplace_back(std::move(hwc));
    
  382.   if (!m_countersSet) {
    
  383.     // reset load and store counters. This is because
    
  384.     // perf does not seem to handle more than three counters
    
  385.     // very well.
    
  386.     m_loadCounter.reset();
    
  387.     m_storeCounter.reset();
    
  388.     m_countersSet = true;
    
  389.   }
    
  390.   return true;
    
  391. }
    
  392. 
    
  393. bool HardwareCounter::eventExists(const char *event) {
    
  394.   // hopefully m_counters set is small, so a linear scan does not hurt
    
  395.   for(unsigned i = 0; i < m_counters.size(); i++) {
    
  396.     if (!strcmp(event, m_counters[i]->m_desc.c_str())) {
    
  397.       return true;
    
  398.     }
    
  399.   }
    
  400.   return false;
    
  401. }
    
  402. 
    
  403. bool HardwareCounter::setPerfEvents(std::string sevents) {
    
  404.   // Make a copy of the string for use with strtok.
    
  405.   auto const sevents_buf = static_cast<char*>(malloc(sevents.size() + 1));
    
  406.   memcpy(sevents_buf, sevents.data(), sevents.size());
    
  407.   sevents_buf[sevents.size()] = '\0';
    
  408. 
    
  409.   char* strtok_buf = nullptr;
    
  410.   char* s = strtok_r(sevents_buf, ",", &strtok_buf);
    
  411.   bool success = true;
    
  412.   while (s) {
    
  413.     if (!eventExists(s) && !addPerfEvent(s)) {
    
  414.       success = false;
    
  415.       break;
    
  416.     }
    
  417.     s = strtok_r(nullptr, ",", &strtok_buf);
    
  418.   }
    
  419.   free(sevents_buf);
    
  420.   return success;
    
  421. }
    
  422. 
    
  423. bool HardwareCounter::SetPerfEvents(std::string events) {
    
  424.   return s_counter->setPerfEvents(events);
    
  425. }
    
  426. 
    
  427. void HardwareCounter::clearPerfEvents() {
    
  428.   m_counters.clear();
    
  429. }
    
  430. 
    
  431. void HardwareCounter::ClearPerfEvents() {
    
  432.   s_counter->clearPerfEvents();
    
  433. }
    
  434. 
    
  435. const std::string
    
  436.   s_instructions("instructions"),
    
  437.   s_loads("loads"),
    
  438.   s_stores("stores");
    
  439. 
    
  440. void HardwareCounter::getPerfEvents(PerfEventCallback f, void* data) {
    
  441.   f(s_instructions, getInstructionCount(), data);
    
  442.   if (!m_countersSet) {
    
  443.     f(s_loads, getLoadCount(), data);
    
  444.     f(s_stores, getStoreCount(), data);
    
  445.   }
    
  446.   for (unsigned i = 0; i < m_counters.size(); i++) {
    
  447.     f(m_counters[i]->m_desc, m_counters[i]->read(), data);
    
  448.   }
    
  449. }
    
  450. 
    
  451. void HardwareCounter::GetPerfEvents(PerfEventCallback f, void* data) {
    
  452.   s_counter->getPerfEvents(f, data);
    
  453. }
    
  454. 
    
  455. ///////////////////////////////////////////////////////////////////////////////
    
  456. }
    
  457. 
    
  458. 
    
  459. #else // NO_HARDWARE_COUNTERS
    
  460. 
    
  461. namespace HPHP {
    
  462. ///////////////////////////////////////////////////////////////////////////////
    
  463. 
    
  464. HardwareCounter HardwareCounter::s_counter;
    
  465. 
    
  466. ///////////////////////////////////////////////////////////////////////////////
    
  467. }
    
  468. 
    
  469. #endif // NO_HARDWARE_COUNTERS