Programming Questions
Programming Questions
Here are some coding challenges for Python, Spark, and Hadoop:
**Python:**
1. Tricky:
- Q: Implement a function in Python that takes a list of integers as input and returns a list of all
pairs of integers that sum up to a specific target value.
```python
def find_pairs(nums, target):
pairs = []
seen = set()
for num in nums:
complement = target - num
if complement in seen:
pairs.append((num, complement))
seen.add(num)
return pairs
# Example usage:
nums = [2, 7, 11, 15, 8, 3]
target = 10
print(find_pairs(nums, target))
```
**Spark:**
2. Tricky:
- Q: Write a Spark code to find the average length of words in a text file stored in HDFS.
```python
from pyspark import SparkContext
# Initialize SparkContext
sc = SparkContext()
# Stop SparkContext
sc.stop()
```
**Hadoop:**
3. Tricky:
- Q: Write a MapReduce program in Java to find the maximum temperature recorded for each
year from a large dataset of temperature records.
```java
// Mapper class
public class MaxTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
@Override
public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String line = value.toString();
String year = line.substring(15, 19);
int airTemperature;
if (line.charAt(87) == '+') {
airTemperature = Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
// Reducer class
public class MaxTemperatureReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
int maxTemperature = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxTemperature = Math.max(maxTemperature, value.get());
}
context.write(key, new IntWritable(maxTemperature));
}
}
```
Sure, I’ll continue providing coding challenges for Python, Spark, and Hadoop:
**Python:**
4. Tricky:
- Q: Implement a function in Python that takes a string as input and returns the count of each
character in the string as a dictionary.
```python
Def count_characters(s):
Counts = {}
For char in s:
Counts[char] = counts.get(char, 0) + 1
Return counts
# Example usage:
S = “hello world”
Print(count_characters(s))
```
5. Tricky:
- Q: Write a Python function to find the longest substring without repeating characters in a
given string.
```python
Def longest_substring_without_repeating(s):
Seen = {}
Start = 0
Max_length = 0
For I, char in enumerate(s):
If char in seen and start <= seen[char]:
Start = seen[char] + 1
Else:
Max_length = max(max_length, I – start + 1)
Seen[char] = i
Return max_length
# Example usage:
S = “abcabcbb”
Print(longest_substring_without_repeating(s))
```
**Spark:**
6. Tricky:
- Q: Write a Spark code to find the average temperature recorded for each year from a large
dataset of temperature records stored in HDFS.
```python
From pyspark import SparkContext
# Initialize SparkContext
Sc = SparkContext()
# Stop SparkContext
Sc.stop()
```
**Hadoop:**
7. Tricky:
- Q: Write a MapReduce program in Java to count the frequency of each word in a large text
document.
```java
// Mapper class
Public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Private final static IntWritable one = new IntWritable(1);
Private Text word = new Text();
@Override
Public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().split(\\s+);
For (String w : words) {
Word.set(w);
Context.write(word, one);
}
}
}
// Reducer class
Public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
Private IntWritable result = new IntWritable();
@Override
Public void reduce(Text key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
Int sum = 0;
For (IntWritable val : values) {
Sum += val.get();
}
Result.set(sum);
Context.write(key, result);
}
}
```
These coding challenges will continue to test your programming skills and knowledge of Python,
Spark, and Hadoop. Let me know if you need further explanation or additional challenges!
8. Tricky:
- Q: Write a Python function to check if a given string is a palindrome or not, considering only
alphanumeric characters and ignoring case.
```python
def is_palindrome(s):
s = ''.join(char.lower() for char in s if char.isalnum())
return s == s[::-1]
# Example usage:
s = "A man, a plan, a canal: Panama"
print(is_palindrome(s))
```
9. Tricky:
- Q: Implement a function in Python that takes a list of integers as input and returns a list of all
unique triplets that sum up to zero.
```python
def three_sum(nums):
triplets = []
nums.sort()
n = len(nums)
for i in range(n):
if i > 0 and nums[i] == nums[i - 1]:
continue
left, right = i + 1, n - 1
while left < right:
total = nums[i] + nums[left] + nums[right]
if total < 0:
left += 1
elif total > 0:
right -= 1
else:
triplets.append([nums[i], nums[left], nums[right]])
while left < right and nums[left] == nums[left + 1]:
left += 1
while left < right and nums[right] == nums[right - 1]:
right -= 1
left += 1
right -= 1
return triplets
# Example usage:
nums = [-1, 0, 1, 2, -1, -4]
print(three_sum(nums))
```
**Spark:**
10. Tricky:
- Q: Write a Spark code to calculate the page rank of web pages using the iterative PageRank
algorithm.
```python
from pyspark import SparkContext
# Initialize SparkContext
sc = SparkContext()
# Stop SparkContext
sc.stop()
```
**Hadoop:**
11. Tricky:
- Q: Write a MapReduce program in Java to find the top 10 most frequent words in a large text
document.
```java
// Mapper class
public class TopWordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().split("\\s+");
for (String w : words) {
word.set(w);
context.write(word, one);
}
}
}
// Reducer class
public class TopWordsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private TreeMap<Integer, String> topWords = new TreeMap<>();
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
topWords.put(sum, key.toString());
if (topWords.size() > 10) {
topWords.remove(topWords.firstKey());
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for (Map.Entry<Integer, String> entry : topWords.descendingMap().entrySet()) {
context.write(new Text(entry.getValue()), new IntWritable(entry.getKey()));
}
}
}
```
We're making progress! Let's continue to provide more challenging coding questions.
**Python:**
12. Tricky:
- Q: Write a Python function to find the longest common subsequence (LCS) of two given
strings.
```python
Def longest_common_subsequence(s1, s2):
M, n = len(s1), len(s2)
Dp = [[0] * (n + 1) for _ in range(m + 1)]
For I in range(1, m + 1):
For j in range(1, n + 1):
If s1[I – 1] == s2[j – 1]:
Dp[i][j] = dp[I – 1][j – 1] + 1
Else:
Dp[i][j] = max(dp[I – 1][j], dp[i][j – 1])
Lcs = []
I, j = m, n
While I > 0 and j > 0:
If s1[I – 1] == s2[j – 1]:
Lcs.append(s1[I – 1])
I -= 1
J -= 1
Elif dp[I – 1][j] > dp[i][j – 1]:
I -= 1
Else:
J -= 1
Return ‘’.join(reversed(lcs))
# Example usage:
S1 = “ABCBDAB”
S2 = “BDCAB”
Print(longest_common_subsequence(s1, s2))
```
13. Tricky:
- Q: Implement a function in Python to generate all valid parentheses combinations of given
length n.
```python
Def generate_parentheses(n):
Def backtrack(s, left, right):
If len(s) == 2 * n:
Parentheses.append(s)
Return
If left < n:
Backtrack(s + ‘(‘, left + 1, right)
If right < left:
Backtrack(s + ‘)’, left, right + 1)
Parentheses = []
Backtrack(‘’, 0, 0)
Return parentheses
# Example usage:
N=3
Print(generate_parentheses(n))
```
**Spark:**
14. Tricky:
- Q: Write a Spark code to find the top N most frequent words in a large text file, considering
case-insensitive word counts.
```python
From pyspark import SparkContext
# Initialize SparkContext
Sc = SparkContext()
# Stop SparkContext
Sc.stop()
```
**Hadoop:**
15. Tricky:
- Q: Write a MapReduce program in Java to find the number of occurrences of each word in a
large text document, considering case-insensitive word counts.
```java
// Mapper class
Public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Private final static IntWritable one = new IntWritable(1);
Private Text word = new Text();
@Override
Public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().toLowerCase().split(\\s+);
For (String w : words) {
Word.set(w);
Context.write(word, one);
}
}
}
// Reducer class
Public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
Private IntWritable result = new IntWritable();
@Override
Public void reduce(Text key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
Int sum = 0;
For (IntWritable val : values) {
Sum += val.get();
}
Result.set(sum);
Context.write(key, result);
}
}
```
**Python:**
16. Tricky:
- Q: Implement a Python function to find the kth smallest element in an unsorted array using
the quickselect algorithm.
```python
Import random
Def quickselect(nums, k):
Def partition(left, right, pivot_index):
Pivot = nums[pivot_index]
Nums[pivot_index], nums[right] = nums[right], nums[pivot_index]
Store_index = left
For I in range(left, right):
If nums[i] < pivot:
Nums[i], nums[store_index] = nums[store_index], nums[i]
Store_index += 1
Nums[right], nums[store_index] = nums[store_index], nums[right]
Return store_index
# Example usage:
Nums = [3, 2, 1, 5, 6, 4]
K=2
Print(quickselect(nums, k))
```
17. Tricky:
- Q: Write a Python function to find the longest substring with at most two distinct characters
in a given string.
```python
Def longest_substring_two_distinct(s):
Start = max_length = 0
Char_index_map = {}
For I, char in enumerate(s):
Char_index_map[char] = i
If len(char_index_map) > 2:
Min_index = min(char_index_map.values())
Del char_index_map[s[min_index]]
Start = min_index + 1
Max_length = max(max_length, I – start + 1)
Return max_length
# Example usage:
S = “eceba”
Print(longest_substring_two_distinct(s))
```
**Spark:**
18. Tricky:
- Q: Write a Spark code to calculate the cosine similarity between two vectors using RDDs.
```python
From pyspark import SparkContext
Import numpy as np
# Initialize SparkContext
Sc = SparkContext()
# Define vectors
Vec1 = np.array([1, 2, 3])
Vec2 = np.array([4, 5, 6])
# Create RDDs
Rdd1 = sc.parallelize(vec1)
Rdd2 = sc.parallelize(vec2)
# Compute magnitudes
Magnitude1 = np.linalg.norm(vec1)
Magnitude2 = np.linalg.norm(vec2)
# Stop SparkContext
Sc.stop()
```
**Hadoop:**
19. Tricky:
- Q: Write a MapReduce program in Java to find the median of a large dataset of integers.
```java
// Mapper class
Public class MedianMapper extends Mapper<LongWritable, Text, NullWritable, IntWritable> {
Private PriorityQueue<Integer> minHeap = new PriorityQueue<>();
Private PriorityQueue<Integer> maxHeap = new
PriorityQueue<>(Collections.reverseOrder());
@Override
Public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] tokens = value.toString().split(\\s+);
For (String token : tokens) {
Int num = Integer.parseInt(token);
If (maxHeap.isEmpty() || num <= maxHeap.peek()) {
maxHeap.add(num);
} else {
minHeap.add(num);
}
If (maxHeap.size() > minHeap.size() + 1) {
minHeap.add(maxHeap.poll());
} else if (minHeap.size() > maxHeap.size()) {
maxHeap.add(minHeap.poll());
}
}
}
@Override
Protected void cleanup(Context context) throws IOException, InterruptedException {
Int median;
If (maxHeap.size() == minHeap.size()) {
Median = (maxHeap.peek() + minHeap.peek()) / 2;
} else {
Median = maxHeap.peek();
}
Context.write(NullWritable.get(), new IntWritable(median));
}
}
```
We’re almost there! Let’s continue.
**Python:**
20. Tricky:
- Q: Implement a Python function to generate all valid permutations of a given string.
```python
From itertools import permutations
Def generate_permutations(s):
Return [‘’.join(permutation) for permutation in permutations(s)]
# Example usage:
S = “abc”
Print(generate_permutations(s))
```
21. Tricky:
- Q: Write a Python function to check if a given string is an anagram of another string.
```python
Def is_anagram(s1, s2):
Return sorted(s1) == sorted(s2)
# Example usage:
S1 = “listen”
S2 = “silent”
Print(is_anagram(s1, s2))
```
**Spark:**
22. Tricky:
- Q: Write a Spark code to find the top N most frequent words in a large text file, considering
word counts and ignoring common stop words.
```python
From pyspark import SparkContext
# Initialize SparkContext
Sc = SparkContext()
# Split lines into words, filter stop words, and count frequency of each word
Word_counts = lines.flatMap(lambda line: line.split()) \
.filter(lambda word: word.lower() not in stop_words) \
.map(lambda word: (word.lower(), 1)) \
.reduceByKey(lambda x, y: x + y)
Print(top_words)
# Stop SparkContext
Sc.stop()
```
**Hadoop:**
23. Tricky:
- Q: Write a MapReduce program in Java to find the most frequent word in a large text
document, ignoring common stop words.
```java
// Mapper class
Public class TopWordMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Private final static IntWritable one = new IntWritable(1);
Private Text word = new Text();
@Override
Public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().toLowerCase().split(\\s+);
For (String w : words) {
If (!stopWords.contains(w)) {
Word.set(w);
Context.write(word, one);
}
}
}
}
// Reducer class
Public class TopWordReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
Private Text mostFrequentWord = new Text();
Private IntWritable maxCount = new IntWritable(Integer.MIN_VALUE);
@Override
Public void reduce(Text key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
Int sum = 0;
For (IntWritable val : values) {
Sum += val.get();
}
If (sum > maxCount.get()) {
mostFrequentWord.set(key);
maxCount.set(sum);
}
}
@Override
Protected void cleanup(Context context) throws IOException, InterruptedException {
Context.write(mostFrequentWord, maxCount);
}
}
```
**Python:**
24. Tricky:
- Q: Implement a Python function to generate all valid IP addresses from a given string
consisting of digits.
```python
def restore_ip_addresses(s):
def backtrack(start, parts):
if len(parts) == 4:
if start == len(s):
ips.append('.'.join(parts))
return
for length in range(1, 4):
if start + length > len(s):
break
part = s[start:start + length]
if (length > 1 and part[0] == '0') or (length == 3 and int(part) > 255):
continue
backtrack(start + length, parts + [part])
ips = []
backtrack(0, [])
return ips
# Example usage:
s = "25525511135"
print(restore_ip_addresses(s))
```
25. Tricky:
- Q: Write a Python function to find the longest substring with at most K distinct characters in
a given string.
```python
def longest_substring_k_distinct(s, k):
start = max_length = 0
char_index_map = {}
for i, char in enumerate(s):
char_index_map[char] = i
if len(char_index_map) > k:
min_index = min(char_index_map.values())
del char_index_map[s[min_index]]
start = min_index + 1
max_length = max(max_length, i - start + 1)
return max_length
# Example usage:
s = "eceba"
k=2
print(longest_substring_k_distinct(s, k))
```
**Spark:**
26. Tricky:
- Q: Write a Spark code to calculate the Euclidean distance between two vectors using RDDs.
```python
from pyspark import SparkContext
import numpy as np
# Initialize SparkContext
sc = SparkContext()
# Define vectors
vec1 = np.array([1, 2, 3])
vec2 = np.array([4, 5, 6])
# Create RDDs
rdd1 = sc.parallelize(vec1)
rdd2 = sc.parallelize(vec2)
# Compute squared difference for each coordinate, sum them, and take square root
euclidean_distance = np.sqrt(rdd1.zip(rdd2).map(lambda x: (x[0] - x[1]) ** 2).sum())
# Stop SparkContext
sc.stop()
```
**Hadoop:**
27. Tricky:
- Q: Write a MapReduce program in Java to find the most frequent word pair (bigram) in a
large text document.
```java
// Mapper class
public class TopBigramMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text bigram = new Text();
@Override
public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().toLowerCase().split("\\s+");
for (int i = 0; i < words.length - 1; i++) {
bigram.set(words[i] + " " + words[i + 1]);
context.write(bigram, one);
}
}
}
// Reducer class
public class TopBigramReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private Text mostFrequentBigram = new Text();
private IntWritable maxCount = new IntWritable(Integer.MIN_VALUE);
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
if (sum > maxCount.get()) {
mostFrequentBigram.set(key);
maxCount.set(sum);
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
context.write(mostFrequentBigram, maxCount);
}
}
```
We're making great progress! Let's continue to reach 100 coding challenges.
**Python:**
28. Tricky:
- Q: Implement a Python function to find the longest word in a dictionary that can be formed
by deleting some characters of another word.
```python
Def longest_word_by_deleting(s, dictionary):
Def is_subsequence(word, target):
I=j=0
While I < len(word) and j < len(target):
If word[i] == target[j]:
J += 1
I += 1
Return j == len(target)
Longest = “”
For word in dictionary:
If is_subsequence(s, word) and len(word) > len(longest):
Longest = word
Return longest
# Example usage:
S = “abpcplea”
Dictionary = [“ale”, “apple”, “monkey”, “plea”]
Print(longest_word_by_deleting(s, dictionary))
```
29. Tricky:
- Q: Write a Python function to find the longest common prefix string amongst an array of
strings.
```python
Def longest_common_prefix(strs):
If not strs:
Return “”
Min_length = min(len(s) for s in strs)
Prefix = “”
For I in range(min_length):
Char = strs[0][i]
If all(s[i] == char for s in strs):
Prefix += char
Else:
Break
Return prefix
# Example usage:
Strs = [“flower”, “flow”, “flight”]
Print(longest_common_prefix(strs))
```
**Spark:**
30. Tricky:
- Q: Write a Spark code to calculate the Manhattan distance between two vectors using RDDs.
```python
From pyspark import SparkContext
Import numpy as np
# Initialize SparkContext
Sc = SparkContext()
# Define vectors
Vec1 = np.array([1, 2, 3])
Vec2 = np.array([4, 5, 6])
# Create RDDs
Rdd1 = sc.parallelize(vec1)
Rdd2 = sc.parallelize(vec2)
# Compute Manhattan distance
Manhattan_distance = rdd1.zip(rdd2).map(lambda x: abs(x[0] – x[1])).sum()
# Stop SparkContext
Sc.stop()
```
**Hadoop:**
31. Tricky:
- Q: Write a MapReduce program in Java to find the number of occurrences of each word
length in a large text document.
```java
// Mapper class
Public class WordLengthMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable>
{
Private final static IntWritable one = new IntWritable(1);
Private IntWritable wordLength = new IntWritable();
@Override
Public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().split(\\s+);
For (String w : words) {
wordLength.set(w.length());
context.write(wordLength, one);
}
}
}
// Reducer class
Public class WordLengthReducer extends Reducer<IntWritable, IntWritable, IntWritable,
IntWritable> {
Private IntWritable totalCount = new IntWritable();
@Override
Public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
Int sum = 0;
For (IntWritable val : values) {
Sum += val.get();
}
totalCount.set(sum);
context.write(key, totalCount);
}
}
```
**Python:**
32. Tricky:
- Q: Implement a Python function to find the longest palindrome substring in a given string.
```python
def longest_palindrome(s):
def expand_from_center(left, right):
while left >= 0 and right < len(s) and s[left] == s[right]:
left -= 1
right += 1
return s[left + 1:right]
longest = ""
for i in range(len(s)):
odd_palindrome = expand_from_center(i, i)
even_palindrome = expand_from_center(i, i + 1)
longest = max(longest, odd_palindrome, even_palindrome, key=len)
return longest
# Example usage:
s = "babad"
print(longest_palindrome(s))
```
33. Tricky:
- Q: Write a Python function to find all valid combinations of k numbers that sum up to n
(unique numbers from 1 to 9).
```python
def combination_sum(k, n):
def backtrack(start, target, path):
if target == 0 and len(path) == k:
combinations.append(path)
return
if target < 0 or len(path) == k:
return
for i in range(start, 10):
backtrack(i + 1, target - i, path + [i])
combinations = []
backtrack(1, n, [])
return combinations
# Example usage:
k=3
n=7
print(combination_sum(k, n))
```
**Spark:**
34. Tricky:
- Q: Write a Spark code to calculate the Jaccard similarity between two sets using RDDs.
```python
from pyspark import SparkContext
# Initialize SparkContext
sc = SparkContext()
# Define sets
set1 = set([1, 2, 3, 4, 5])
set2 = set([3, 4, 5, 6, 7])
# Create RDDs
rdd1 = sc.parallelize(set1)
rdd2 = sc.parallelize(set2)
# Stop SparkContext
sc.stop()
```
**Hadoop:**
35. Tricky:
- Q: Write a MapReduce program in Java to find the number of occurrences of each vowel in a
large text document.
```java
// Mapper class
public class VowelCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text vowel = new Text();
@Override
public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().toLowerCase().split("\\s+");
for (String w : words) {
for (char c : w.toCharArray()) {
if ("aeiou".contains(String.valueOf(c))) {
vowel.set(String.valueOf(c));
context.write(vowel, one);
}
}
}
}
}
// Reducer class
public class VowelCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable totalCount = new IntWritable();
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
totalCount.set(sum);
context.write(key, totalCount);
}
}
```
**Python:**
36. Tricky:
- Q: Implement a Python function to find the longest substring without repeating characters in
a given string.
```python
Def longest_substring_without_repeating(s):
Start = max_length = 0
Char_index_map = {}
For I, char in enumerate(s):
If char in char_index_map and start <= char_index_map[char]:
Start = char_index_map[char] + 1
Else:
Max_length = max(max_length, I – start + 1)
Char_index_map[char] = i
Return max_length
# Example usage:
S = “abcabcbb”
Print(longest_substring_without_repeating(s))
```
37. Tricky:
- Q: Write a Python function to find all distinct combinations of a given size k in a given array
of integers, where each number is used exactly once.
```python
Def combinations(nums, k):
Def backtrack(start, path):
If len(path) == k:
Combinations.append(path)
Return
For I in range(start, len(nums)):
Backtrack(I + 1, path + [nums[i]])
Combinations = []
Backtrack(0, [])
Return combinations
# Example usage:
Nums = [1, 2, 3, 4]
K=2
Print(combinations(nums, k))
```
**Spark:**
38. Tricky:
- Q: Write a Spark code to find the top N most frequent pairs of words (bigrams) in a large text
file.
```python
From pyspark import SparkContext
# Initialize SparkContext
Sc = SparkContext()
# Split lines into words, create bigrams, and count frequency of each bigram
Bigrams = lines.flatMap(lambda line: line.split()) \
.zipWithIndex() \
.flatMap(lambda pair: [(pair[1], (pair[0], pair[1] + 1))]) \
.groupByKey() \
.map(lambda pair: pair[1]) \
.flatMap(lambda indices: [(indices[i], indices[I + 1]) for I in range(len(indices) – 1)]) \
.map(lambda bigram: (bigram, 1)) \
.reduceByKey(lambda x, y: x + y)
Print(top_bigrams)
# Stop SparkContext
Sc.stop()
```
**Hadoop:**
39. Tricky:
- Q: Write a MapReduce program in Java to find the number of occurrences of each word
length in a large text document, considering only words with lengths greater than or equal to 5.
```java
// Mapper class
Public class LongWordLengthMapper extends Mapper<LongWritable, Text, IntWritable,
IntWritable> {
Private final static IntWritable one = new IntWritable(1);
Private IntWritable wordLength = new IntWritable();
@Override
Public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().split(\\s+);
For (String w : words) {
If (w.length() >= 5) {
wordLength.set(w.length());
context.write(wordLength, one);
}
}
}
}
// Reducer class
Public class LongWordLengthReducer extends Reducer<IntWritable, IntWritable, IntWritable,
IntWritable> {
Private IntWritable totalCount = new IntWritable();
@Override
Public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
Int sum = 0;
For (IntWritable val : values) {
Sum += val.get();
}
totalCount.set(sum);
context.write(key, totalCount);
}
}
```
**Python:**
40. Tricky:
- Q: Implement a Python function to find the longest substring with at least K repeating
characters in a given string.
```python
From collections import Counter
Max_length = 0
For I in range(len(s)):
For j in range(I + 1, len(s) + 1):
If is_valid(s[i:j]):
Max_length = max(max_length, j – i)
Return max_length
# Example usage:
S = “aaabb”
K=3
Print(longest_substring_k_repeating(s, k))
```
41. Tricky:
- Q: Write a Python function to find the kth largest element in an unsorted array using the
quickselect algorithm.
```python
Import random
# Example usage:
Nums = [3, 2, 1, 5, 6, 4]
K=2
Print(quickselect(nums, k))
```
**Spark:**
42. Tricky:
- Q: Write a Spark code to calculate the Pearson correlation coefficient between two vectors
using RDDs.
```python
From pyspark import SparkContext
Import numpy as np
# Initialize SparkContext
Sc = SparkContext()
# Define vectors
Vec1 = np.array([1, 2, 3])
Vec2 = np.array([4, 5, 6])
# Create RDDs
Rdd1 = sc.parallelize(vec1)
Rdd2 = sc.parallelize(vec2)
# Stop SparkContext
Sc.stop()
```
**Hadoop:**
43. Tricky:
- Q: Write a MapReduce program in Java to find the number of occurrences of each word
length in a large text document, considering only words with lengths less than or equal to 10.
```java
// Mapper class
Public class ShortWordLengthMapper extends Mapper<LongWritable, Text, IntWritable,
IntWritable> {
Private final static IntWritable one = new IntWritable(1);
Private IntWritable wordLength = new IntWritable();
@Override
Public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String[] words = value.toString().split(\\s+);
For (String w : words) {
If (w.length() <= 10) {
wordLength.set(w.length());
context.write(wordLength, one);
}
}
}
}
// Reducer class
Public class ShortWordLengthReducer extends Reducer<IntWritable, IntWritable, IntWritable,
IntWritable> {
Private IntWritable totalCount = new IntWritable();
@Override
Public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
Int sum = 0;
For (IntWritable val : values) {
Sum += val.get();
}
totalCount.set(sum);
context.write(key, totalCount);
}
}
```
**Python:**
48. Tricky:
- Q: Implement a Python function to find the longest substring with at least K distinct
characters in a given string.
```python
Def longest_substring_k_distinct(s, k):
Start = max_length = 0
Char_count = {}
For end, char in enumerate(s):
Char_count[char] = char_count.get(char, 0) + 1
While len(char_count) > k:
Left_char = s[start]
Char_count[left_char] -= 1
If char_count[left_char] == 0:
Del char_count[left_char]
Start += 1
Max_length = max(max_length, end – start + 1)
Return max_length
# Example usage:
S = “eceba”
K=2
Print(longest_substring_k_distinct(s, k))
```
49. Tricky:
- Q: Write a Python function to find the maximum sum of a contiguous subarray within a
given one-dimensional array of integers.
```python
Def max_subarray_sum(nums):
Max_sum = curr_sum = nums[0]
For num in nums[1:]:
Curr_sum = max(num, curr_sum + num)
Max_sum = max(max_sum, curr_sum)
Return max_sum
# Example usage:
Nums = [-2, 1, -3, 4, -1, 2, 1, -5, 4]
Print(max_subarray_sum(nums))
```
**Spark:**
50. Tricky:
- Q: Write a Spark code to calculate the cosine similarity between two vectors using RDDs.
```python
From pyspark import SparkContext
Import numpy as np
# Initialize SparkContext
Sc = SparkContext()
# Define vectors
Vec1 = np.array([1, 2, 3])
Vec2 = np.array([4, 5, 6])
# Create RDDs
Rdd1 = sc.parallelize(vec1)
Rdd2 = sc.parallelize(vec2)
# Compute dot product
Dot_product = rdd1.zip(rdd2).map(lambda x: x[0] * x[1]).sum()
# Compute magnitudes
Magnitude1 = np.sqrt(rdd1.map(lambda x: x ** 2).sum())
Magnitude2 = np.sqrt(rdd2.map(lambda x: x ** 2).sum())
# Stop SparkContext
Sc.stop()
```